PEP 274: Dictionary Comprehensions - Elegant Dict Creation
TL;DR
PEP 274 introduced dictionary comprehensions in Python 2.7/3.0 with syntax {key: value for item in iterable}
, providing a concise way to create dictionaries similar to list comprehensions.
Interesting!
Dictionary comprehensions were actually proposed before list comprehensions but took longer to implement because of syntax debates - should it be {key: value for ...}
or {key=value for ...}
? The colon syntax won because it matches dictionary literal syntax perfectly.
Basic Dictionary Comprehension Syntax
python code snippet start
# Traditional way to create a dictionary
squares = {}
for x in range(5):
squares[x] = x ** 2
print(squares) # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
# Dictionary comprehension
squares = {x: x ** 2 for x in range(5)}
print(squares) # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
# With string keys
word_lengths = {word: len(word) for word in ['apple', 'banana', 'cherry']}
print(word_lengths) # {'apple': 5, 'banana': 6, 'cherry': 6}
# Using enumerate for index-based keys
fruits = ['apple', 'banana', 'cherry']
indexed_fruits = {i: fruit for i, fruit in enumerate(fruits)}
print(indexed_fruits) # {0: 'apple', 1: 'banana', 2: 'cherry'}
python code snippet end
Dictionary Comprehensions with Conditions
python code snippet start
# Filter with conditions
numbers = range(10)
even_squares = {x: x ** 2 for x in numbers if x % 2 == 0}
print(even_squares) # {0: 0, 2: 4, 4: 16, 6: 36, 8: 64}
# Complex filtering
words = ['apple', 'banana', 'apricot', 'blueberry', 'cherry']
a_words = {word: len(word) for word in words if word.startswith('a')}
print(a_words) # {'apple': 5, 'apricot': 7}
# Multiple conditions
scores = {'Alice': 95, 'Bob': 67, 'Charlie': 89, 'Diana': 92, 'Eve': 78}
high_scores = {name: score for name, score in scores.items()
if score >= 80 and len(name) <= 5}
print(high_scores) # {'Alice': 95, 'Diana': 92}
# Conditional values
numbers = range(-5, 6)
abs_values = {x: abs(x) if x < 0 else x for x in numbers}
print(abs_values) # {-5: 5, -4: 4, -3: 3, -2: 2, -1: 1, 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}
python code snippet end
Transforming Existing Dictionaries
python code snippet start
# Transform keys
original = {'first_name': 'John', 'last_name': 'Doe', 'age': 30}
camel_case = {key.replace('_', ''): value for key, value in original.items()}
print(camel_case) # {'firstname': 'John', 'lastname': 'Doe', 'age': 30}
# Transform values
prices = {'apple': 1.20, 'banana': 0.80, 'cherry': 2.50}
rounded_prices = {fruit: round(price) for fruit, price in prices.items()}
print(rounded_prices) # {'apple': 1, 'banana': 1, 'cherry': 2}
# Transform both keys and values
student_scores = {'alice': 85, 'bob': 92, 'charlie': 78}
formatted = {name.title(): f"{score}%" for name, score in student_scores.items()}
print(formatted) # {'Alice': '85%', 'Bob': '92%', 'Charlie': '78%'}
# Swap keys and values
original = {'a': 1, 'b': 2, 'c': 3}
swapped = {value: key for key, value in original.items()}
print(swapped) # {1: 'a', 2: 'b', 3: 'c'}
python code snippet end
Working with Multiple Iterables
python code snippet start
# Using zip for parallel iteration
keys = ['name', 'age', 'city']
values = ['Alice', 25, 'New York']
person = {key: value for key, value in zip(keys, values)}
print(person) # {'name': 'Alice', 'age': 25, 'city': 'New York'}
# Multiple lists
fruits = ['apple', 'banana', 'cherry']
colors = ['red', 'yellow', 'dark red']
prices = [1.20, 0.80, 2.50]
fruit_info = {
fruit: {'color': color, 'price': price}
for fruit, color, price in zip(fruits, colors, prices)
}
print(fruit_info)
# {'apple': {'color': 'red', 'price': 1.2},
# 'banana': {'color': 'yellow', 'price': 0.8},
# 'cherry': {'color': 'dark red', 'price': 2.5}}
# Dictionary from pairs
pairs = [('x', 1), ('y', 2), ('z', 3)]
coordinates = {letter: number for letter, number in pairs}
print(coordinates) # {'x': 1, 'y': 2, 'z': 3}
python code snippet end
Nested Dictionary Comprehensions
python code snippet start
# Creating nested dictionaries
students = ['Alice', 'Bob', 'Charlie']
subjects = ['Math', 'Science', 'English']
# Initialize grade book
grade_book = {
student: {subject: 0 for subject in subjects}
for student in students
}
print(grade_book)
# {'Alice': {'Math': 0, 'Science': 0, 'English': 0},
# 'Bob': {'Math': 0, 'Science': 0, 'English': 0},
# 'Charlie': {'Math': 0, 'Science': 0, 'English': 0}}
# Matrix as nested dict
matrix = {
i: {j: i * j for j in range(3)}
for i in range(3)
}
print(matrix)
# {0: {0: 0, 1: 0, 2: 0},
# 1: {0: 0, 1: 1, 2: 2},
# 2: {0: 0, 1: 2, 2: 4}}
# Flattening nested structure
nested_data = {
'user1': {'name': 'Alice', 'age': 25},
'user2': {'name': 'Bob', 'age': 30},
'user3': {'name': 'Charlie', 'age': 35}
}
# Extract just names
names = {user_id: user_data['name'] for user_id, user_data in nested_data.items()}
print(names) # {'user1': 'Alice', 'user2': 'Bob', 'user3': 'Charlie'}
python code snippet end
Real-World Examples
Data Aggregation
python code snippet start
# Sales data aggregation
sales_data = [
{'product': 'apple', 'quantity': 10, 'price': 1.20},
{'product': 'banana', 'quantity': 15, 'price': 0.80},
{'product': 'apple', 'quantity': 5, 'price': 1.20},
{'product': 'cherry', 'quantity': 8, 'price': 2.50},
{'product': 'banana', 'quantity': 12, 'price': 0.80}
]
# Total revenue by product
from collections import defaultdict
temp_totals = defaultdict(float)
for sale in sales_data:
temp_totals[sale['product']] += sale['quantity'] * sale['price']
revenue_by_product = {product: total for product, total in temp_totals.items()}
print(revenue_by_product) # {'apple': 18.0, 'banana': 21.6, 'cherry': 20.0}
# Using dict comprehension with grouping helper
def group_by_product(sales_data):
"""Group sales by product using dict comprehension"""
products = set(sale['product'] for sale in sales_data)
return {
product: sum(
sale['quantity'] * sale['price']
for sale in sales_data
if sale['product'] == product
)
for product in products
}
revenue = group_by_product(sales_data)
print(revenue) # {'banana': 21.6, 'cherry': 20.0, 'apple': 18.0}
python code snippet end
Configuration Processing
python code snippet start
# Process configuration with validation
raw_config = {
'database_host': 'localhost',
'database_port': '5432',
'debug_mode': 'true',
'max_connections': '100',
'timeout': '30.5',
'empty_value': '',
'invalid_port': 'abc'
}
def convert_value(key, value):
"""Convert string values to appropriate types"""
if not value: # Handle empty strings
return None
if key.endswith('_port'):
try:
return int(value)
except ValueError:
return None
elif key.endswith('_mode'):
return value.lower() == 'true'
elif key in ['timeout']:
try:
return float(value)
except ValueError:
return None
elif key.startswith('max_'):
try:
return int(value)
except ValueError:
return None
else:
return value
# Process configuration with type conversion
processed_config = {
key: convert_value(key, value)
for key, value in raw_config.items()
if convert_value(key, value) is not None # Filter out None values
}
print(processed_config)
# {'database_host': 'localhost', 'database_port': 5432, 'debug_mode': True,
# 'max_connections': 100, 'timeout': 30.5}
python code snippet end
Text Processing
python code snippet start
# Word frequency analysis
text = """
Python is awesome. Python is powerful.
Python makes programming fun. Programming with Python is great!
"""
import re
from collections import Counter
# Clean and split text
words = re.findall(r'\b\w+\b', text.lower())
# Count frequencies using dict comprehension
word_freq = {word: words.count(word) for word in set(words)}
print(word_freq)
# More efficient with Counter, then filter with dict comprehension
counter = Counter(words)
common_words = {word: count for word, count in counter.items() if count > 1}
print(common_words) # {'python': 4, 'is': 3, 'programming': 2}
# Character frequency (excluding spaces and punctuation)
chars = [c for c in text.lower() if c.isalpha()]
char_freq = {char: chars.count(char) for char in set(chars)}
print({k: v for k, v in sorted(char_freq.items()) if v > 3})
python code snippet end
API Response Processing
python code snippet start
# Process API responses
api_responses = [
{'id': 1, 'name': 'Alice', 'status': 'active', 'last_login': '2024-01-15'},
{'id': 2, 'name': 'Bob', 'status': 'inactive', 'last_login': '2024-01-10'},
{'id': 3, 'name': 'Charlie', 'status': 'active', 'last_login': '2024-01-20'},
{'id': 4, 'name': 'Diana', 'status': 'active', 'last_login': None}
]
# Create lookup dictionary by ID
users_by_id = {user['id']: user for user in api_responses}
print(users_by_id[1]['name']) # Alice
# Active users with recent login
from datetime import datetime, timedelta
cutoff_date = datetime.now() - timedelta(days=10)
def parse_date(date_str):
if date_str:
try:
return datetime.strptime(date_str, '%Y-%m-%d')
except ValueError:
return None
return None
recent_active_users = {
user['name']: user['last_login']
for user in api_responses
if user['status'] == 'active' and
user['last_login'] and
parse_date(user['last_login']) and
parse_date(user['last_login']) >= cutoff_date
}
print(recent_active_users)
# Status summary
status_summary = {
status: [user['name'] for user in api_responses if user['status'] == status]
for status in set(user['status'] for user in api_responses)
}
print(status_summary)
# {'active': ['Alice', 'Charlie', 'Diana'], 'inactive': ['Bob']}
python code snippet end
Performance Considerations
python code snippet start
import time
from collections import defaultdict
# Large dataset for performance testing
data_size = 100000
large_dataset = [(f"key_{i}", i % 100) for i in range(data_size)]
# Method 1: Traditional loop
start_time = time.time()
result1 = {}
for key, value in large_dataset:
if value % 2 == 0:
result1[key] = value * 2
loop_time = time.time() - start_time
# Method 2: Dictionary comprehension
start_time = time.time()
result2 = {key: value * 2 for key, value in large_dataset if value % 2 == 0}
comp_time = time.time() - start_time
print(f"Traditional loop: {loop_time:.4f} seconds")
print(f"Dict comprehension: {comp_time:.4f} seconds")
print(f"Speedup: {loop_time/comp_time:.2f}x")
# Verify results are identical
print(f"Results identical: {result1 == result2}")
# Memory usage demonstration
import sys
# Dict comprehension creates dict in one operation
comp_dict = {i: i**2 for i in range(1000)}
comp_size = sys.getsizeof(comp_dict)
# Traditional method might allocate/reallocate multiple times
trad_dict = {}
for i in range(1000):
trad_dict[i] = i**2
trad_size = sys.getsizeof(trad_dict)
print(f"Comprehension dict size: {comp_size} bytes")
print(f"Traditional dict size: {trad_size} bytes")
python code snippet end
Advanced Patterns
Conditional Key-Value Pairs
python code snippet start
# Create dict with conditional keys
data = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
# Include key only if condition is met
filtered = {
(key.upper() if value % 2 == 0 else key): value
for key, value in data.items()
if value > 2
}
print(filtered) # {'c': 3, 'D': 4, 'e': 5}
# Multiple transformations
numbers = range(1, 11)
complex_dict = {
f"num_{n}": {
'value': n,
'square': n**2,
'is_even': n % 2 == 0,
'category': 'small' if n <= 5 else 'large'
}
for n in numbers
if n % 3 != 0 # Exclude multiples of 3
}
print(complex_dict['num_1'])
# {'value': 1, 'square': 1, 'is_even': False, 'category': 'small'}
python code snippet end
Error Handling in Comprehensions
python code snippet start
# Safe dictionary comprehension with error handling
messy_data = [
('a', '1'), ('b', '2'), ('c', 'invalid'),
('d', '4'), ('e', ''), ('f', '6')
]
def safe_int_convert(value, default=0):
"""Safely convert to int with default"""
try:
return int(value) if value else default
except (ValueError, TypeError):
return default
# Method 1: Helper function
safe_dict = {key: safe_int_convert(value) for key, value in messy_data}
print(safe_dict) # {'a': 1, 'b': 2, 'c': 0, 'd': 4, 'e': 0, 'f': 6}
# Method 2: Filter out invalid entries
valid_only = {
key: int(value)
for key, value in messy_data
if value and value.isdigit()
}
print(valid_only) # {'a': 1, 'b': 2, 'd': 4, 'f': 6}
# Method 3: Use conditional expression
conditional_dict = {
key: int(value) if value and value.isdigit() else None
for key, value in messy_data
}
print(conditional_dict)
# {'a': 1, 'b': 2, 'c': None, 'd': 4, 'e': None, 'f': 6}
python code snippet end
Best Practices
python code snippet start
# Good: Clear and readable
user_ages = {user['name']: user['age'] for user in users if user['age'] >= 18}
# Avoid: Too complex for a single comprehension
# This is hard to read and debug
complex_bad = {
k.upper().replace('_', '-'):
v**2 if isinstance(v, int) and v > 0 else str(v).title()
for k, v in data.items()
if len(str(k)) > 2 and (isinstance(v, int) or isinstance(v, str))
}
# Better: Break into steps
def transform_key(key):
return key.upper().replace('_', '-')
def transform_value(value):
if isinstance(value, int) and value > 0:
return value**2
elif isinstance(value, str):
return value.title()
return value
def is_valid_item(key, value):
return (len(str(key)) > 2 and
(isinstance(value, int) or isinstance(value, str)))
complex_good = {
transform_key(k): transform_value(v)
for k, v in data.items()
if is_valid_item(k, v)
}
# Use meaningful variable names
# Good
word_counts = {word: len(word) for word in vocabulary}
# Less clear
d = {w: len(w) for w in vocab}
# When working with nested data, consider readability
# Sometimes a regular loop is clearer than nested comprehensions
nested_data = {
'users': [
{'name': 'Alice', 'groups': ['admin', 'users']},
{'name': 'Bob', 'groups': ['users']},
{'name': 'Charlie', 'groups': ['admin', 'users', 'moderators']}
]
}
# Complex but readable
user_permissions = {
user['name']: user['groups']
for user in nested_data['users']
if 'admin' in user['groups']
}
# For very complex transformations, regular functions might be clearer
def process_user_data(users):
result = {}
for user in users:
if 'admin' in user['groups']:
result[user['name']] = {
'groups': user['groups'],
'is_admin': True,
'group_count': len(user['groups'])
}
return result
python code snippet end
Dictionary comprehensions make Python code more elegant and often more efficient, transforming the verbose loop-based dictionary creation into concise, readable expressions that clearly show the intent.
Reference: PEP 274 - Dict Comprehensions