Skip to main content Brad's PyNotes

PEP 274: Dictionary Comprehensions - Elegant Dict Creation

TL;DR

PEP 274 introduced dictionary comprehensions in Python 2.7/3.0 with syntax {key: value for item in iterable}, providing a concise way to create dictionaries similar to list comprehensions.

Interesting!

Dictionary comprehensions were actually proposed before list comprehensions but took longer to implement because of syntax debates - should it be {key: value for ...} or {key=value for ...}? The colon syntax won because it matches dictionary literal syntax perfectly.

Basic Dictionary Comprehension Syntax

python code snippet start

# Traditional way to create a dictionary
squares = {}
for x in range(5):
    squares[x] = x ** 2
print(squares)  # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

# Dictionary comprehension
squares = {x: x ** 2 for x in range(5)}
print(squares)  # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

# With string keys
word_lengths = {word: len(word) for word in ['apple', 'banana', 'cherry']}
print(word_lengths)  # {'apple': 5, 'banana': 6, 'cherry': 6}

# Using enumerate for index-based keys
fruits = ['apple', 'banana', 'cherry']
indexed_fruits = {i: fruit for i, fruit in enumerate(fruits)}
print(indexed_fruits)  # {0: 'apple', 1: 'banana', 2: 'cherry'}

python code snippet end

Dictionary Comprehensions with Conditions

python code snippet start

# Filter with conditions
numbers = range(10)
even_squares = {x: x ** 2 for x in numbers if x % 2 == 0}
print(even_squares)  # {0: 0, 2: 4, 4: 16, 6: 36, 8: 64}

# Complex filtering
words = ['apple', 'banana', 'apricot', 'blueberry', 'cherry']
a_words = {word: len(word) for word in words if word.startswith('a')}
print(a_words)  # {'apple': 5, 'apricot': 7}

# Multiple conditions
scores = {'Alice': 95, 'Bob': 67, 'Charlie': 89, 'Diana': 92, 'Eve': 78}
high_scores = {name: score for name, score in scores.items() 
               if score >= 80 and len(name) <= 5}
print(high_scores)  # {'Alice': 95, 'Diana': 92}

# Conditional values
numbers = range(-5, 6)
abs_values = {x: abs(x) if x < 0 else x for x in numbers}
print(abs_values)  # {-5: 5, -4: 4, -3: 3, -2: 2, -1: 1, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}

python code snippet end

Transforming Existing Dictionaries

python code snippet start

# Transform keys
original = {'first_name': 'John', 'last_name': 'Doe', 'age': 30}
camel_case = {key.replace('_', ''): value for key, value in original.items()}
print(camel_case)  # {'firstname': 'John', 'lastname': 'Doe', 'age': 30}

# Transform values
prices = {'apple': 1.20, 'banana': 0.80, 'cherry': 2.50}
rounded_prices = {fruit: round(price) for fruit, price in prices.items()}
print(rounded_prices)  # {'apple': 1, 'banana': 1, 'cherry': 2}

# Transform both keys and values
student_scores = {'alice': 85, 'bob': 92, 'charlie': 78}
formatted = {name.title(): f"{score}%" for name, score in student_scores.items()}
print(formatted)  # {'Alice': '85%', 'Bob': '92%', 'Charlie': '78%'}

# Swap keys and values
original = {'a': 1, 'b': 2, 'c': 3}
swapped = {value: key for key, value in original.items()}
print(swapped)  # {1: 'a', 2: 'b', 3: 'c'}

python code snippet end

Working with Multiple Iterables

python code snippet start

# Using zip for parallel iteration
keys = ['name', 'age', 'city']
values = ['Alice', 25, 'New York']
person = {key: value for key, value in zip(keys, values)}
print(person)  # {'name': 'Alice', 'age': 25, 'city': 'New York'}

# Multiple lists
fruits = ['apple', 'banana', 'cherry']
colors = ['red', 'yellow', 'dark red']
prices = [1.20, 0.80, 2.50]

fruit_info = {
    fruit: {'color': color, 'price': price}
    for fruit, color, price in zip(fruits, colors, prices)
}
print(fruit_info)
# {'apple': {'color': 'red', 'price': 1.2}, 
#  'banana': {'color': 'yellow', 'price': 0.8}, 
#  'cherry': {'color': 'dark red', 'price': 2.5}}

# Dictionary from pairs
pairs = [('x', 1), ('y', 2), ('z', 3)]
coordinates = {letter: number for letter, number in pairs}
print(coordinates)  # {'x': 1, 'y': 2, 'z': 3}

python code snippet end

Nested Dictionary Comprehensions

python code snippet start

# Creating nested dictionaries
students = ['Alice', 'Bob', 'Charlie']
subjects = ['Math', 'Science', 'English']

# Initialize grade book
grade_book = {
    student: {subject: 0 for subject in subjects}
    for student in students
}
print(grade_book)
# {'Alice': {'Math': 0, 'Science': 0, 'English': 0},
#  'Bob': {'Math': 0, 'Science': 0, 'English': 0},
#  'Charlie': {'Math': 0, 'Science': 0, 'English': 0}}

# Matrix as nested dict
matrix = {
    i: {j: i * j for j in range(3)}
    for i in range(3)
}
print(matrix)
# {0: {0: 0, 1: 0, 2: 0},
#  1: {0: 0, 1: 1, 2: 2},
#  2: {0: 0, 1: 2, 2: 4}}

# Flattening nested structure
nested_data = {
    'user1': {'name': 'Alice', 'age': 25},
    'user2': {'name': 'Bob', 'age': 30},
    'user3': {'name': 'Charlie', 'age': 35}
}

# Extract just names
names = {user_id: user_data['name'] for user_id, user_data in nested_data.items()}
print(names)  # {'user1': 'Alice', 'user2': 'Bob', 'user3': 'Charlie'}

python code snippet end

Real-World Examples

Data Aggregation

python code snippet start

# Sales data aggregation
sales_data = [
    {'product': 'apple', 'quantity': 10, 'price': 1.20},
    {'product': 'banana', 'quantity': 15, 'price': 0.80},
    {'product': 'apple', 'quantity': 5, 'price': 1.20},
    {'product': 'cherry', 'quantity': 8, 'price': 2.50},
    {'product': 'banana', 'quantity': 12, 'price': 0.80}
]

# Total revenue by product
from collections import defaultdict
temp_totals = defaultdict(float)
for sale in sales_data:
    temp_totals[sale['product']] += sale['quantity'] * sale['price']

revenue_by_product = {product: total for product, total in temp_totals.items()}
print(revenue_by_product)  # {'apple': 18.0, 'banana': 21.6, 'cherry': 20.0}

# Using dict comprehension with grouping helper
def group_by_product(sales_data):
    """Group sales by product using dict comprehension"""
    products = set(sale['product'] for sale in sales_data)
    return {
        product: sum(
            sale['quantity'] * sale['price'] 
            for sale in sales_data 
            if sale['product'] == product
        )
        for product in products
    }

revenue = group_by_product(sales_data)
print(revenue)  # {'banana': 21.6, 'cherry': 20.0, 'apple': 18.0}

python code snippet end

Configuration Processing

python code snippet start

# Process configuration with validation
raw_config = {
    'database_host': 'localhost',
    'database_port': '5432',
    'debug_mode': 'true',
    'max_connections': '100',
    'timeout': '30.5',
    'empty_value': '',
    'invalid_port': 'abc'
}

def convert_value(key, value):
    """Convert string values to appropriate types"""
    if not value:  # Handle empty strings
        return None
    
    if key.endswith('_port'):
        try:
            return int(value)
        except ValueError:
            return None
    elif key.endswith('_mode'):
        return value.lower() == 'true'
    elif key in ['timeout']:
        try:
            return float(value)
        except ValueError:
            return None
    elif key.startswith('max_'):
        try:
            return int(value)
        except ValueError:
            return None
    else:
        return value

# Process configuration with type conversion
processed_config = {
    key: convert_value(key, value)
    for key, value in raw_config.items()
    if convert_value(key, value) is not None  # Filter out None values
}

print(processed_config)
# {'database_host': 'localhost', 'database_port': 5432, 'debug_mode': True, 
#  'max_connections': 100, 'timeout': 30.5}

python code snippet end

Text Processing

python code snippet start

# Word frequency analysis
text = """
Python is awesome. Python is powerful.
Python makes programming fun. Programming with Python is great!
"""

import re
from collections import Counter

# Clean and split text
words = re.findall(r'\b\w+\b', text.lower())

# Count frequencies using dict comprehension
word_freq = {word: words.count(word) for word in set(words)}
print(word_freq)

# More efficient with Counter, then filter with dict comprehension
counter = Counter(words)
common_words = {word: count for word, count in counter.items() if count > 1}
print(common_words)  # {'python': 4, 'is': 3, 'programming': 2}

# Character frequency (excluding spaces and punctuation)
chars = [c for c in text.lower() if c.isalpha()]
char_freq = {char: chars.count(char) for char in set(chars)}
print({k: v for k, v in sorted(char_freq.items()) if v > 3})

python code snippet end

API Response Processing

python code snippet start

# Process API responses
api_responses = [
    {'id': 1, 'name': 'Alice', 'status': 'active', 'last_login': '2024-01-15'},
    {'id': 2, 'name': 'Bob', 'status': 'inactive', 'last_login': '2024-01-10'},
    {'id': 3, 'name': 'Charlie', 'status': 'active', 'last_login': '2024-01-20'},
    {'id': 4, 'name': 'Diana', 'status': 'active', 'last_login': None}
]

# Create lookup dictionary by ID
users_by_id = {user['id']: user for user in api_responses}
print(users_by_id[1]['name'])  # Alice

# Active users with recent login
from datetime import datetime, timedelta
cutoff_date = datetime.now() - timedelta(days=10)

def parse_date(date_str):
    if date_str:
        try:
            return datetime.strptime(date_str, '%Y-%m-%d')
        except ValueError:
            return None
    return None

recent_active_users = {
    user['name']: user['last_login']
    for user in api_responses
    if user['status'] == 'active' and 
       user['last_login'] and
       parse_date(user['last_login']) and
       parse_date(user['last_login']) >= cutoff_date
}
print(recent_active_users)

# Status summary
status_summary = {
    status: [user['name'] for user in api_responses if user['status'] == status]
    for status in set(user['status'] for user in api_responses)
}
print(status_summary)
# {'active': ['Alice', 'Charlie', 'Diana'], 'inactive': ['Bob']}

python code snippet end

Performance Considerations

python code snippet start

import time
from collections import defaultdict

# Large dataset for performance testing
data_size = 100000
large_dataset = [(f"key_{i}", i % 100) for i in range(data_size)]

# Method 1: Traditional loop
start_time = time.time()
result1 = {}
for key, value in large_dataset:
    if value % 2 == 0:
        result1[key] = value * 2
loop_time = time.time() - start_time

# Method 2: Dictionary comprehension
start_time = time.time()
result2 = {key: value * 2 for key, value in large_dataset if value % 2 == 0}
comp_time = time.time() - start_time

print(f"Traditional loop: {loop_time:.4f} seconds")
print(f"Dict comprehension: {comp_time:.4f} seconds")
print(f"Speedup: {loop_time/comp_time:.2f}x")

# Verify results are identical
print(f"Results identical: {result1 == result2}")

# Memory usage demonstration
import sys

# Dict comprehension creates dict in one operation
comp_dict = {i: i**2 for i in range(1000)}
comp_size = sys.getsizeof(comp_dict)

# Traditional method might allocate/reallocate multiple times
trad_dict = {}
for i in range(1000):
    trad_dict[i] = i**2
trad_size = sys.getsizeof(trad_dict)

print(f"Comprehension dict size: {comp_size} bytes")
print(f"Traditional dict size: {trad_size} bytes")

python code snippet end

Advanced Patterns

Conditional Key-Value Pairs

python code snippet start

# Create dict with conditional keys
data = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

# Include key only if condition is met
filtered = {
    (key.upper() if value % 2 == 0 else key): value
    for key, value in data.items()
    if value > 2
}
print(filtered)  # {'c': 3, 'D': 4, 'e': 5}

# Multiple transformations
numbers = range(1, 11)
complex_dict = {
    f"num_{n}": {
        'value': n,
        'square': n**2,
        'is_even': n % 2 == 0,
        'category': 'small' if n <= 5 else 'large'
    }
    for n in numbers
    if n % 3 != 0  # Exclude multiples of 3
}

print(complex_dict['num_1'])
# {'value': 1, 'square': 1, 'is_even': False, 'category': 'small'}

python code snippet end

Error Handling in Comprehensions

python code snippet start

# Safe dictionary comprehension with error handling
messy_data = [
    ('a', '1'), ('b', '2'), ('c', 'invalid'), 
    ('d', '4'), ('e', ''), ('f', '6')
]

def safe_int_convert(value, default=0):
    """Safely convert to int with default"""
    try:
        return int(value) if value else default
    except (ValueError, TypeError):
        return default

# Method 1: Helper function
safe_dict = {key: safe_int_convert(value) for key, value in messy_data}
print(safe_dict)  # {'a': 1, 'b': 2, 'c': 0, 'd': 4, 'e': 0, 'f': 6}

# Method 2: Filter out invalid entries
valid_only = {
    key: int(value) 
    for key, value in messy_data 
    if value and value.isdigit()
}
print(valid_only)  # {'a': 1, 'b': 2, 'd': 4, 'f': 6}

# Method 3: Use conditional expression
conditional_dict = {
    key: int(value) if value and value.isdigit() else None
    for key, value in messy_data
}
print(conditional_dict)
# {'a': 1, 'b': 2, 'c': None, 'd': 4, 'e': None, 'f': 6}

python code snippet end

Best Practices

python code snippet start

# Good: Clear and readable
user_ages = {user['name']: user['age'] for user in users if user['age'] >= 18}

# Avoid: Too complex for a single comprehension
# This is hard to read and debug
complex_bad = {
    k.upper().replace('_', '-'): 
    v**2 if isinstance(v, int) and v > 0 else str(v).title() 
    for k, v in data.items() 
    if len(str(k)) > 2 and (isinstance(v, int) or isinstance(v, str))
}

# Better: Break into steps
def transform_key(key):
    return key.upper().replace('_', '-')

def transform_value(value):
    if isinstance(value, int) and value > 0:
        return value**2
    elif isinstance(value, str):
        return value.title()
    return value

def is_valid_item(key, value):
    return (len(str(key)) > 2 and 
            (isinstance(value, int) or isinstance(value, str)))

complex_good = {
    transform_key(k): transform_value(v)
    for k, v in data.items()
    if is_valid_item(k, v)
}

# Use meaningful variable names
# Good
word_counts = {word: len(word) for word in vocabulary}

# Less clear
d = {w: len(w) for w in vocab}

# When working with nested data, consider readability
# Sometimes a regular loop is clearer than nested comprehensions
nested_data = {
    'users': [
        {'name': 'Alice', 'groups': ['admin', 'users']},
        {'name': 'Bob', 'groups': ['users']},
        {'name': 'Charlie', 'groups': ['admin', 'users', 'moderators']}
    ]
}

# Complex but readable
user_permissions = {
    user['name']: user['groups']
    for user in nested_data['users']
    if 'admin' in user['groups']
}

# For very complex transformations, regular functions might be clearer
def process_user_data(users):
    result = {}
    for user in users:
        if 'admin' in user['groups']:
            result[user['name']] = {
                'groups': user['groups'],
                'is_admin': True,
                'group_count': len(user['groups'])
            }
    return result

python code snippet end

Dictionary comprehensions make Python code more elegant and often more efficient, transforming the verbose loop-based dictionary creation into concise, readable expressions that clearly show the intent.

Reference: PEP 274 - Dict Comprehensions