Struct Module: Binary Data Processing and C Integration
TL;DR
The struct module converts between Python values and C structs represented as bytes objects, enabling binary data processing for network protocols, file formats, and hardware communication with precise control over byte layout.
Interesting!
Struct format strings can specify exact byte order and alignment - you can pack data as little-endian, big-endian, or native format, making it perfect for cross-platform binary protocols and file format parsing.
Basic Pack and Unpack
Converting Python Values to Binary
python code snippet start
import struct
# Pack an integer and float
data = struct.pack('if', 42, 3.14)
print(data) # b'*\x00\x00\x00\xc3\xf5H@'
# Unpack binary data back to Python values
values = struct.unpack('if', data)
print(values) # (42, 3.141592025756836)
python code snippet end
Calculate Size of Packed Data
python code snippet start
# How many bytes will this format use?
size = struct.calcsize('if') # int + float
print(size) # 8 bytes (4 + 4)
# Useful for reading exact amounts from files
with open('data.bin', 'rb') as f:
chunk = f.read(struct.calcsize('3i')) # Read 3 integers
a, b, c = struct.unpack('3i', chunk)
python code snippet end
Format String Components
Byte Order and Alignment
python code snippet start
# Native byte order and alignment
struct.pack('@i', 42) # Platform-specific
# Little-endian (Intel/x86)
struct.pack('<i', 42) # b'*\x00\x00\x00'
# Big-endian (Network byte order)
struct.pack('>i', 42) # b'\x00\x00\x00*'
# No alignment (packed)
struct.pack('=i', 42) # Native order, no padding
python code snippet end
Common Format Characters
python code snippet start
# Integer types
struct.pack('b', -128) # signed char (1 byte)
struct.pack('B', 255) # unsigned char (1 byte)
struct.pack('h', -32768) # short (2 bytes)
struct.pack('H', 65535) # unsigned short (2 bytes)
struct.pack('i', -2**31) # int (4 bytes)
struct.pack('I', 2**32-1) # unsigned int (4 bytes)
struct.pack('q', -2**63) # long long (8 bytes)
# Floating point
struct.pack('f', 3.14) # float (4 bytes)
struct.pack('d', 3.14) # double (8 bytes)
# Strings and bytes
struct.pack('10s', b'hello') # 10-byte string/bytes
struct.pack('c', b'A') # single character
python code snippet end
Network Protocol Example
HTTP Header Parsing
python code snippet start
import struct
import socket
# Simple packet format: [length][type][data]
def pack_message(msg_type: int, data: bytes) -> bytes:
"""Pack message with header"""
length = len(data)
# Network byte order: length(4) + type(2) + data
header = struct.pack('>IH', length, msg_type)
return header + data
def unpack_message(packet: bytes) -> tuple[int, bytes]:
"""Unpack message header"""
# Read header (6 bytes)
header_size = struct.calcsize('>IH')
length, msg_type = struct.unpack('>IH', packet[:header_size])
data = packet[header_size:header_size + length]
return msg_type, data
# Usage
message = pack_message(1, b'Hello, network!')
print(f"Packed: {message}")
msg_type, data = unpack_message(message)
print(f"Type: {msg_type}, Data: {data}")
python code snippet end
Binary File Format Processing
Reading Image Headers
python code snippet start
import struct
def read_bmp_header(filename: str) -> dict:
"""Read BMP file header information"""
with open(filename, 'rb') as f:
# BMP file header (14 bytes)
header = f.read(14)
# Unpack header fields
signature, file_size, reserved1, reserved2, offset = struct.unpack(
'<2sIHHI', header
)
return {
'signature': signature,
'file_size': file_size,
'data_offset': offset
}
# Usage (would work with actual BMP file)
# info = read_bmp_header('image.bmp')
# print(f"File size: {info['file_size']} bytes")
python code snippet end
Creating Custom Binary Format
python code snippet start
import struct
from datetime import datetime
class BinaryLogEntry:
"""Custom binary log format"""
@staticmethod
def pack(timestamp: float, level: int, message: str) -> bytes:
"""Pack log entry to binary format"""
msg_bytes = message.encode('utf-8')
msg_len = len(msg_bytes)
# Format: timestamp(8) + level(4) + msg_len(4) + message
header = struct.pack('>dII', timestamp, level, msg_len)
return header + msg_bytes
@staticmethod
def unpack(data: bytes) -> tuple[float, int, str]:
"""Unpack binary log entry"""
header_size = struct.calcsize('>dII')
timestamp, level, msg_len = struct.unpack(
'>dII', data[:header_size]
)
message = data[header_size:header_size + msg_len].decode('utf-8')
return timestamp, level, message
# Usage
entry = BinaryLogEntry.pack(datetime.now().timestamp(), 2, "Error occurred")
timestamp, level, message = BinaryLogEntry.unpack(entry)
print(f"Level {level}: {message}")
python code snippet end
Hardware Communication
Serial Device Protocol
python code snippet start
import struct
def create_sensor_command(sensor_id: int, command: int, value: float) -> bytes:
"""Create command packet for sensor device"""
# Command format: [start_byte][sensor_id][command][value][checksum]
start_byte = 0xFF
# Pack core data
core_data = struct.pack('>BBBf', start_byte, sensor_id, command, value)
# Calculate simple checksum
checksum = sum(core_data) & 0xFF
# Add checksum
return core_data + struct.pack('B', checksum)
def parse_sensor_response(data: bytes) -> dict:
"""Parse sensor response packet"""
if len(data) < 8:
raise ValueError("Packet too short")
# Unpack response: [start][sensor_id][status][value]
start, sensor_id, status, value = struct.unpack('>BBBf', data[:7])
return {
'sensor_id': sensor_id,
'status': status,
'value': value,
'valid': start == 0xFF
}
python code snippet end
Performance Tips
Struct Objects for Repeated Operations
python code snippet start
import struct
# Create reusable Struct object
packet_format = struct.Struct('>IHH') # length + type + flags
# Much faster for repeated operations
for i in range(1000):
data = packet_format.pack(100, 1, 0x8000)
length, msg_type, flags = packet_format.unpack(data)
python code snippet end
Use struct for any binary data processing - it’s the foundation of network protocols, file formats, and system integration in Python.
Reference: Python Struct Documentation