CSV Module: Easy CSV File Reading and Writing

June 15, 2025

TL;DR

The csv module provides csv.reader(), csv.writer(), csv.DictReader(), and csv.DictWriter() for robust CSV file processing with automatic dialect detection and proper handling of quotes, delimiters, and line endings.

Interesting!

The csv module can automatically detect CSV dialects (delimiter, quote style, etc.) from sample data, and it properly handles tricky cases like embedded commas, quotes, and newlines within CSV fields that would break naive string splitting.

Basic CSV Reading

import csv

# Reading with csv.reader()
with open('data.csv', 'r') as file:
    reader = csv.reader(file)
    
    # Skip header row
    headers = next(reader)
    print(f"Headers: {headers}")
    
    # Process each row
    for row in reader:
        print(row)  # Each row is a list
        print(f"Name: {row[0]}, Age: {row[1]}")

# Sample data.csv:
# Name,Age,City
# Alice,25,New York
# Bob,30,London

DictReader for Named Access

import csv

# Reading with DictReader (recommended)
with open('employees.csv', 'r') as file:
    reader = csv.DictReader(file)
    
    for row in reader:
        # Each row is a dictionary
        print(f"Name: {row['name']}")
        print(f"Department: {row['department']}")
        print(f"Salary: ${row['salary']}")
        print("---")

# Alternative: specify fieldnames explicitly
with open('data.csv', 'r') as file:
    reader = csv.DictReader(file, fieldnames=['name', 'age', 'city'])
    next(reader)  # Skip header if present
    
    for row in reader:
        print(row['name'], row['age'])

Writing CSV Files

import csv

# Writing with csv.writer()
data = [
    ['Name', 'Age', 'City'],
    ['Alice', 25, 'New York'],
    ['Bob', 30, 'London'],
    ['Charlie', 35, 'Tokyo']
]

with open('output.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    
    # Write header
    writer.writerow(data[0])
    
    # Write data rows
    writer.writerows(data[1:])

# Writing individual rows
with open('people.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    
    for person in people_data:
        writer.writerow([person.name, person.age, person.city])

DictWriter for Dictionary Data

import csv

# Writing with DictWriter (recommended)
employees = [
    {'name': 'Alice', 'department': 'Engineering', 'salary': 75000},
    {'name': 'Bob', 'department': 'Sales', 'salary': 65000},
    {'name': 'Charlie', 'department': 'Marketing', 'salary': 70000}
]

with open('employees.csv', 'w', newline='') as file:
    fieldnames = ['name', 'department', 'salary']
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    
    # Write header
    writer.writeheader()
    
    # Write data
    for employee in employees:
        writer.writerow(employee)
    
    # Or write all at once
    # writer.writerows(employees)

Custom Dialects and Formatting

import csv

# Custom dialect
class MyDialect(csv.excel):
    delimiter = '|'
    quotechar = '"'
    doublequote = True
    skipinitialspace = True
    lineterminator = '\n'
    quoting = csv.QUOTE_MINIMAL

# Register custom dialect
csv.register_dialect('pipes', MyDialect)

# Use custom dialect
with open('pipe_data.csv', 'w', newline='') as file:
    writer = csv.writer(file, dialect='pipes')
    writer.writerow(['Name', 'Age', 'City'])
    writer.writerow(['Alice', 25, 'New York'])

# Reading with custom dialect
with open('pipe_data.csv', 'r') as file:
    reader = csv.reader(file, dialect='pipes')
    for row in reader:
        print(row)

Handling Different Formats

import csv

# Tab-separated values
with open('data.tsv', 'w', newline='') as file:
    writer = csv.writer(file, delimiter='\t')
    writer.writerow(['Name', 'Score', 'Grade'])
    writer.writerow(['Alice', 95, 'A'])

# Custom quote handling
with open('quotes.csv', 'w', newline='') as file:
    writer = csv.writer(file, quoting=csv.QUOTE_ALL)  # Quote everything
    writer.writerow(['Name', 'Comment'])
    writer.writerow(['Bob', 'Said "Hello, World!"'])

# Different quoting options:
# csv.QUOTE_MINIMAL - Only when necessary (default)
# csv.QUOTE_ALL - Quote all fields
# csv.QUOTE_NONNUMERIC - Quote non-numeric fields
# csv.QUOTE_NONE - Never quote (escape instead)

Practical Data Processing

Sales Data Analysis

import csv
from collections import defaultdict

def analyze_sales_data(filename):
    """Analyze sales data from CSV file"""
    sales_by_region = defaultdict(float)
    product_totals = defaultdict(int)
    
    with open(filename, 'r') as file:
        reader = csv.DictReader(file)
        
        for row in reader:
            region = row['region']
            product = row['product']
            quantity = int(row['quantity'])
            price = float(row['price'])
            
            sales_by_region[region] += quantity * price
            product_totals[product] += quantity
    
    return dict(sales_by_region), dict(product_totals)

# Usage
# sales.csv format: region,product,quantity,price
sales_by_region, product_totals = analyze_sales_data('sales.csv')
print("Sales by region:", sales_by_region)
print("Product quantities:", product_totals)

Data Cleaning and Transformation

import csv

def clean_customer_data(input_file, output_file):
    """Clean and standardize customer data"""
    
    with open(input_file, 'r') as infile, open(output_file, 'w', newline='') as outfile:
        reader = csv.DictReader(infile)
        
        # Define output fields
        fieldnames = ['customer_id', 'name', 'email', 'phone', 'city']
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for row in reader:
            # Clean and transform data
            cleaned_row = {
                'customer_id': row['id'].strip(),
                'name': row['full_name'].title().strip(),
                'email': row['email_address'].lower().strip(),
                'phone': ''.join(filter(str.isdigit, row['phone_number'])),
                'city': row['city'].title().strip()
            }
            
            # Validation
            if '@' in cleaned_row['email'] and len(cleaned_row['phone']) >= 10:
                writer.writerow(cleaned_row)
            else:
                print(f"Skipped invalid row: {row['id']}")

# Usage
clean_customer_data('raw_customers.csv', 'clean_customers.csv')

CSV to JSON Conversion

import csv
import json

def csv_to_json(csv_file, json_file):
    """Convert CSV file to JSON"""
    data = []
    
    with open(csv_file, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            # Convert numeric strings to numbers
            for key, value in row.items():
                if value.isdigit():
                    row[key] = int(value)
                elif value.replace('.', '').isdigit():
                    row[key] = float(value)
            
            data.append(row)
    
    with open(json_file, 'w') as file:
        json.dump(data, file, indent=2)

# Usage
csv_to_json('products.csv', 'products.json')

Error Handling and Edge Cases

import csv

def robust_csv_reader(filename):
    """Read CSV with proper error handling"""
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            # Detect dialect automatically
            sample = file.read(1024)
            file.seek(0)
            
            try:
                dialect = csv.Sniffer().sniff(sample)
                reader = csv.DictReader(file, dialect=dialect)
            except csv.Error:
                # Fall back to default dialect
                reader = csv.DictReader(file)
            
            rows = []
            for line_num, row in enumerate(reader, 1):
                try:
                    # Validate required fields
                    if not row.get('name') or not row.get('email'):
                        print(f"Warning: Missing data in line {line_num}")
                        continue
                    
                    rows.append(row)
                
                except Exception as e:
                    print(f"Error processing line {line_num}: {e}")
                    continue
            
            return rows
    
    except FileNotFoundError:
        print(f"File {filename} not found")
        return []
    except UnicodeDecodeError:
        print(f"Encoding error in {filename}")
        return []

# Usage
data = robust_csv_reader('messy_data.csv')

Performance Tips

import csv

# For large files, process row by row (memory efficient)
def process_large_csv(filename):
    """Process large CSV files efficiently"""
    with open(filename, 'r') as file:
        reader = csv.DictReader(file)
        
        for row in reader:
            # Process one row at a time
            # Don't load entire file into memory
            yield process_row(row)

# Batch processing for database inserts
def batch_insert_csv(filename, batch_size=1000):
    """Insert CSV data in batches"""
    batch = []
    
    with open(filename, 'r') as file:
        reader = csv.DictReader(file)
        
        for row in reader:
            batch.append(row)
            
            if len(batch) >= batch_size:
                # Insert batch to database
                insert_batch(batch)
                batch = []
        
        # Insert remaining rows
        if batch:
            insert_batch(batch)

Best Practices

# Always use newline='' when writing CSV files
with open('data.csv', 'w', newline='') as file:
    writer = csv.writer(file)

# Use DictReader/DictWriter for better readability
# Good
with open('data.csv', 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        print(row['name'])

# Less readable
with open('data.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(row[0])  # Which column is this?

# Handle encoding explicitly
with open('data.csv', 'r', encoding='utf-8') as file:
    reader = csv.DictReader(file)

# Validate data types
def safe_int(value, default=0):
    try:
        return int(value)
    except (ValueError, TypeError):
        return default

age = safe_int(row['age'])

The csv module makes working with tabular data straightforward while handling the complexities of CSV format edge cases automatically.

CSV operations integrate seamlessly with pathlib for file handling and JSON conversion for comprehensive data processing workflows.

Reference: Python CSV Module Documentation

CSV Module: Easy CSV File Reading and Writing

TL;DR

Interesting!

Basic CSV Reading

DictReader for Named Access

Writing CSV Files

DictWriter for Dictionary Data

Custom Dialects and Formatting

Handling Different Formats

Practical Data Processing

Sales Data Analysis

Data Cleaning and Transformation

CSV to JSON Conversion

Error Handling and Edge Cases

Performance Tips

Best Practices

Collections Module: Specialized Data Structures for Python Power Users

Gzip Module: Efficient File Compression and Decompression

Glob Module: Unix-Style Pathname Pattern Matching

Statistics Module

PEP 289: Generator Expressions - Memory-Efficient Data Processing

Pathlib Module: Modern Path Handling Made Simple

JSON Module: Data Interchange Made Simple

Argparse Module: Professional Command-Line Argument Parsing

Subprocess Module: Safe Process Execution and Command Running

Unittest Module: Built-in Testing Framework for Python

Input Output Tutorial

Logging Module: Professional Application Logging