Pickle Module
TL;DR
The pickle
module serializes Python objects to bytes and deserializes them back - enabling object persistence and inter-process communication.
Interesting!
Pickle can serialize almost any Python object, including functions, classes, and nested objects - but never unpickle untrusted data as it can execute arbitrary code!
Basic Serialization
python code snippet start
import pickle
# Serialize to file
data = {'name': 'Alice', 'scores': [95, 87, 92], 'active': True}
with open('data.pickle', 'wb') as f:
pickle.dump(data, f)
# Deserialize from file
with open('data.pickle', 'rb') as f:
loaded_data = pickle.load(f)
print(loaded_data) # {'name': 'Alice', 'scores': [95, 87, 92], 'active': True}
python code snippet end
Bytes Serialization
python code snippet start
# Serialize to bytes (in memory)
original_list = [1, 2, 3, {'a': 'hello', 'b': [4, 5]}]
pickled_bytes = pickle.dumps(original_list)
# Deserialize from bytes
restored_list = pickle.loads(pickled_bytes)
print(restored_list) # [1, 2, 3, {'a': 'hello', 'b': [4, 5]}]
# Objects are reconstructed, not just referenced
print(original_list is restored_list) # False
print(original_list == restored_list) # True
python code snippet end
Complex Objects
python code snippet start
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
def greet(self):
return f"Hello, I'm {self.name}"
# Serialize custom objects
person = Person("Bob", 30)
pickled_person = pickle.dumps(person)
# Deserialize (class must be available)
restored_person = pickle.loads(pickled_person)
print(restored_person.greet()) # Hello, I'm Bob
python code snippet end
Protocol Versions
python code snippet start
# Use highest protocol for best performance
data = [1, 2, 3, 4, 5] * 1000
# Different protocol versions
pickle_v4 = pickle.dumps(data, protocol=4)
pickle_v5 = pickle.dumps(data, protocol=5)
pickle_highest = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
print(f"Protocol 4 size: {len(pickle_v4)}")
print(f"Protocol 5 size: {len(pickle_v5)}")
print(f"Highest protocol: {pickle.HIGHEST_PROTOCOL}")
python code snippet end
What Can’t Be Pickled
python code snippet start
import pickle
# These will raise PicklingError:
# - Lambda functions
# - Local functions (defined inside other functions)
# - Open file objects
# - Database connections
# - Thread locks
# Lambda fails
try:
pickle.dumps(lambda x: x * 2)
except pickle.PicklingError as e:
print(f"Can't pickle lambda: {e}")
# Open file fails
try:
with open('test.txt', 'w') as f:
pickle.dumps(f)
except (pickle.PicklingError, TypeError) as e:
print(f"Can't pickle file: {e}")
python code snippet end
Security Warning
python code snippet start
# DANGEROUS - never do this with untrusted data!
malicious_data = b"arbitrary bytes that could execute code"
# This could be dangerous:
# obj = pickle.loads(untrusted_data) # DON'T DO THIS!
# For untrusted data, use safer alternatives:
import json
# JSON is safer for simple data
safe_data = {"name": "Alice", "age": 30}
json_str = json.dumps(safe_data)
loaded_safe = json.loads(json_str)
python code snippet end
Practical Applications
python code snippet start
# Cache expensive computations
import pickle
import os
def expensive_computation(x):
# Simulate expensive operation
return x ** 2 + x * 3 + 42
def cached_computation(x):
cache_file = f'cache_{x}.pickle'
if os.path.exists(cache_file):
with open(cache_file, 'rb') as f:
return pickle.load(f)
result = expensive_computation(x)
with open(cache_file, 'wb') as f:
pickle.dump(result, f)
return result
# Send objects between processes
from multiprocessing import Queue
import pickle
def worker_process(queue):
while True:
pickled_task = queue.get()
if pickled_task is None:
break
task = pickle.loads(pickled_task)
# Process task...
python code snippet end
Pickle is perfect for caching, inter-process communication, and object persistence - just remember the golden rule: only unpickle data you trust! Use pickle with multiprocessing communication and database BLOB storage . For safer alternatives, consider JSON for simple data and pathlib for file management .
Reference: pickle — Python object serialization