Skip to main content Brad's PyNotes

Struct Module: Binary Data Processing and C Integration

TL;DR

The struct module converts between Python values and C structs represented as bytes objects, enabling binary data processing for network protocols, file formats, and hardware communication with precise control over byte layout.

Interesting!

Struct format strings can specify exact byte order and alignment - you can pack data as little-endian, big-endian, or native format, making it perfect for cross-platform binary protocols and file format parsing.

Basic Pack and Unpack

Converting Python Values to Binary

python code snippet start

import struct

# Pack an integer and float
data = struct.pack('if', 42, 3.14)
print(data)  # b'*\x00\x00\x00\xc3\xf5H@'

# Unpack binary data back to Python values
values = struct.unpack('if', data)
print(values)  # (42, 3.141592025756836)

python code snippet end

Calculate Size of Packed Data

python code snippet start

# How many bytes will this format use?
size = struct.calcsize('if')  # int + float
print(size)  # 8 bytes (4 + 4)

# Useful for reading exact amounts from files
with open('data.bin', 'rb') as f:
    chunk = f.read(struct.calcsize('3i'))  # Read 3 integers
    a, b, c = struct.unpack('3i', chunk)

python code snippet end

Format String Components

Byte Order and Alignment

python code snippet start

# Native byte order and alignment
struct.pack('@i', 42)  # Platform-specific

# Little-endian (Intel/x86)
struct.pack('<i', 42)  # b'*\x00\x00\x00'

# Big-endian (Network byte order)
struct.pack('>i', 42)  # b'\x00\x00\x00*'

# No alignment (packed)
struct.pack('=i', 42)  # Native order, no padding

python code snippet end

Common Format Characters

python code snippet start

# Integer types
struct.pack('b', -128)     # signed char (1 byte)
struct.pack('B', 255)      # unsigned char (1 byte)
struct.pack('h', -32768)   # short (2 bytes)
struct.pack('H', 65535)    # unsigned short (2 bytes)
struct.pack('i', -2**31)   # int (4 bytes)
struct.pack('I', 2**32-1)  # unsigned int (4 bytes)
struct.pack('q', -2**63)   # long long (8 bytes)

# Floating point
struct.pack('f', 3.14)     # float (4 bytes)
struct.pack('d', 3.14)     # double (8 bytes)

# Strings and bytes
struct.pack('10s', b'hello')    # 10-byte string/bytes
struct.pack('c', b'A')          # single character

python code snippet end

Network Protocol Example

HTTP Header Parsing

python code snippet start

import struct
import socket

# Simple packet format: [length][type][data]
def pack_message(msg_type: int, data: bytes) -> bytes:
    """Pack message with header"""
    length = len(data)
    # Network byte order: length(4) + type(2) + data
    header = struct.pack('>IH', length, msg_type)
    return header + data

def unpack_message(packet: bytes) -> tuple[int, bytes]:
    """Unpack message header"""
    # Read header (6 bytes)
    header_size = struct.calcsize('>IH')
    length, msg_type = struct.unpack('>IH', packet[:header_size])
    data = packet[header_size:header_size + length]
    return msg_type, data

# Usage
message = pack_message(1, b'Hello, network!')
print(f"Packed: {message}")

msg_type, data = unpack_message(message)
print(f"Type: {msg_type}, Data: {data}")

python code snippet end

Binary File Format Processing

Reading Image Headers

python code snippet start

import struct

def read_bmp_header(filename: str) -> dict:
    """Read BMP file header information"""
    with open(filename, 'rb') as f:
        # BMP file header (14 bytes)
        header = f.read(14)
        
        # Unpack header fields
        signature, file_size, reserved1, reserved2, offset = struct.unpack(
            '<2sIHHI', header
        )
        
        return {
            'signature': signature,
            'file_size': file_size,
            'data_offset': offset
        }

# Usage (would work with actual BMP file)
# info = read_bmp_header('image.bmp')
# print(f"File size: {info['file_size']} bytes")

python code snippet end

Creating Custom Binary Format

python code snippet start

import struct
from datetime import datetime

class BinaryLogEntry:
    """Custom binary log format"""
    
    @staticmethod
    def pack(timestamp: float, level: int, message: str) -> bytes:
        """Pack log entry to binary format"""
        msg_bytes = message.encode('utf-8')
        msg_len = len(msg_bytes)
        
        # Format: timestamp(8) + level(4) + msg_len(4) + message
        header = struct.pack('>dII', timestamp, level, msg_len)
        return header + msg_bytes
    
    @staticmethod
    def unpack(data: bytes) -> tuple[float, int, str]:
        """Unpack binary log entry"""
        header_size = struct.calcsize('>dII')
        timestamp, level, msg_len = struct.unpack(
            '>dII', data[:header_size]
        )
        message = data[header_size:header_size + msg_len].decode('utf-8')
        return timestamp, level, message

# Usage
entry = BinaryLogEntry.pack(datetime.now().timestamp(), 2, "Error occurred")
timestamp, level, message = BinaryLogEntry.unpack(entry)
print(f"Level {level}: {message}")

python code snippet end

Hardware Communication

Serial Device Protocol

python code snippet start

import struct

def create_sensor_command(sensor_id: int, command: int, value: float) -> bytes:
    """Create command packet for sensor device"""
    # Command format: [start_byte][sensor_id][command][value][checksum]
    start_byte = 0xFF
    
    # Pack core data
    core_data = struct.pack('>BBBf', start_byte, sensor_id, command, value)
    
    # Calculate simple checksum
    checksum = sum(core_data) & 0xFF
    
    # Add checksum
    return core_data + struct.pack('B', checksum)

def parse_sensor_response(data: bytes) -> dict:
    """Parse sensor response packet"""
    if len(data) < 8:
        raise ValueError("Packet too short")
    
    # Unpack response: [start][sensor_id][status][value]
    start, sensor_id, status, value = struct.unpack('>BBBf', data[:7])
    
    return {
        'sensor_id': sensor_id,
        'status': status,
        'value': value,
        'valid': start == 0xFF
    }

python code snippet end

Performance Tips

Struct Objects for Repeated Operations

python code snippet start

import struct

# Create reusable Struct object
packet_format = struct.Struct('>IHH')  # length + type + flags

# Much faster for repeated operations
for i in range(1000):
    data = packet_format.pack(100, 1, 0x8000)
    length, msg_type, flags = packet_format.unpack(data)

python code snippet end

Use struct for any binary data processing - it’s the foundation of network protocols, file formats, and system integration in Python.

Reference: Python Struct Documentation