Skip to main content Brad's PyNotes

Pickle Module

TL;DR

The pickle module serializes Python objects to bytes and deserializes them back - enabling object persistence and inter-process communication.

Interesting!

Pickle can serialize almost any Python object, including functions, classes, and nested objects - but never unpickle untrusted data as it can execute arbitrary code!

Basic Serialization

python code snippet start

import pickle

# Serialize to file
data = {'name': 'Alice', 'scores': [95, 87, 92], 'active': True}

with open('data.pickle', 'wb') as f:
    pickle.dump(data, f)

# Deserialize from file
with open('data.pickle', 'rb') as f:
    loaded_data = pickle.load(f)

print(loaded_data)  # {'name': 'Alice', 'scores': [95, 87, 92], 'active': True}

python code snippet end

Bytes Serialization

python code snippet start

# Serialize to bytes (in memory)
original_list = [1, 2, 3, {'a': 'hello', 'b': [4, 5]}]
pickled_bytes = pickle.dumps(original_list)

# Deserialize from bytes
restored_list = pickle.loads(pickled_bytes)
print(restored_list)  # [1, 2, 3, {'a': 'hello', 'b': [4, 5]}]

# Objects are reconstructed, not just referenced
print(original_list is restored_list)  # False
print(original_list == restored_list)  # True

python code snippet end

Complex Objects

python code snippet start

class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
    
    def greet(self):
        return f"Hello, I'm {self.name}"

# Serialize custom objects
person = Person("Bob", 30)
pickled_person = pickle.dumps(person)

# Deserialize (class must be available)
restored_person = pickle.loads(pickled_person)
print(restored_person.greet())  # Hello, I'm Bob

python code snippet end

Protocol Versions

python code snippet start

# Use highest protocol for best performance
data = [1, 2, 3, 4, 5] * 1000

# Different protocol versions
pickle_v4 = pickle.dumps(data, protocol=4)
pickle_v5 = pickle.dumps(data, protocol=5)
pickle_highest = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)

print(f"Protocol 4 size: {len(pickle_v4)}")
print(f"Protocol 5 size: {len(pickle_v5)}")
print(f"Highest protocol: {pickle.HIGHEST_PROTOCOL}")

python code snippet end

What Can’t Be Pickled

python code snippet start

import pickle

# These will raise PicklingError:
# - Lambda functions
# - Local functions (defined inside other functions)
# - Open file objects
# - Database connections
# - Thread locks

# Lambda fails
try:
    pickle.dumps(lambda x: x * 2)
except pickle.PicklingError as e:
    print(f"Can't pickle lambda: {e}")

# Open file fails
try:
    with open('test.txt', 'w') as f:
        pickle.dumps(f)
except (pickle.PicklingError, TypeError) as e:
    print(f"Can't pickle file: {e}")

python code snippet end

Security Warning

python code snippet start

# DANGEROUS - never do this with untrusted data!
malicious_data = b"arbitrary bytes that could execute code"

# This could be dangerous:
# obj = pickle.loads(untrusted_data)  # DON'T DO THIS!

# For untrusted data, use safer alternatives:
import json

# JSON is safer for simple data
safe_data = {"name": "Alice", "age": 30}
json_str = json.dumps(safe_data)
loaded_safe = json.loads(json_str)

python code snippet end

Practical Applications

python code snippet start

# Cache expensive computations
import pickle
import os

def expensive_computation(x):
    # Simulate expensive operation
    return x ** 2 + x * 3 + 42

def cached_computation(x):
    cache_file = f'cache_{x}.pickle'
    
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as f:
            return pickle.load(f)
    
    result = expensive_computation(x)
    
    with open(cache_file, 'wb') as f:
        pickle.dump(result, f)
    
    return result

# Send objects between processes
from multiprocessing import Queue
import pickle

def worker_process(queue):
    while True:
        pickled_task = queue.get()
        if pickled_task is None:
            break
        task = pickle.loads(pickled_task)
        # Process task...

python code snippet end

Pickle is perfect for caching, inter-process communication, and object persistence - just remember the golden rule: only unpickle data you trust! Use pickle with multiprocessing communication and database BLOB storage . For safer alternatives, consider JSON for simple data and pathlib for file management .

Reference: pickle — Python object serialization