Modules Tutorial: Organizing and Reusing Code With Python Modules
TL;DR
Python modules are files containing Python code that can be imported and reused, organized into packages (directories with init.py), with various import styles (import, from…import, as) and special attributes like name and all.
Interesting!
Python’s module system uses the name variable to determine if a file is being run directly (name == “main”) or imported as a module, enabling files to act both as standalone scripts and importable libraries with a simple if name == “main”: guard.
What are Modules?
python code snippet start
# A module is simply a .py file containing Python code
# Let's create a simple module called 'calculator.py'
# calculator.py
def add(a, b):
"""Add two numbers"""
return a + b
def subtract(a, b):
"""Subtract two numbers"""
return a - b
def multiply(a, b):
"""Multiply two numbers"""
return a * b
def divide(a, b):
"""Divide two numbers"""
if b == 0:
raise ValueError("Cannot divide by zero")
return a / b
# Module-level variable
PI = 3.14159
# Module-level code (runs when imported)
print("Calculator module loaded!")
python code snippet end
Basic Module Importing
python code snippet start
# Different ways to import modules
# 1. Import entire module
import calculator
result = calculator.add(5, 3)
print(f"5 + 3 = {result}")
print(f"PI = {calculator.PI}")
# 2. Import specific functions
from calculator import add, subtract
result = add(10, 5)
difference = subtract(10, 5)
# 3. Import with alias
import calculator as calc
result = calc.multiply(4, 6)
# 4. Import specific function with alias
from calculator import divide as div
result = div(20, 4)
# 5. Import everything (use sparingly)
from calculator import *
result = add(1, 2) # Can use directly
# 6. Import multiple items
from calculator import add, subtract, PI
python code snippet end
The name Variable
python code snippet start
# calculator.py (revised with __name__ check)
def add(a, b):
return a + b
def subtract(a, b):
return a - b
PI = 3.14159
def main():
"""Main function for when script is run directly"""
print("Calculator Demo")
print(f"5 + 3 = {add(5, 3)}")
print(f"10 - 4 = {subtract(10, 4)}")
print(f"PI = {PI}")
# This runs only when file is executed directly
if __name__ == "__main__":
main()
# When imported: __name__ will be "calculator"
# When run directly: __name__ will be "__main__"
python code snippet end
Module Search Path
python code snippet start
import sys
# Show where Python looks for modules
print("Module search path:")
for path in sys.path:
print(f" {path}")
# Add custom directory to search path
sys.path.append("/path/to/my/modules")
# Check if module is available
try:
import my_custom_module
print("Custom module imported successfully")
except ImportError:
print("Custom module not found")
# Show loaded modules
print("\nLoaded modules:")
for name in sorted(sys.modules.keys()):
if not name.startswith('_'):
print(f" {name}")
python code snippet end
Creating Packages
python code snippet start
# Package structure:
# mypackage/
# __init__.py
# core.py
# utils.py
# subpackage/
# __init__.py
# advanced.py
# mypackage/__init__.py
"""
MyPackage - A demo package for calculations
"""
# Package-level imports
from .core import Calculator
from .utils import format_result
# Package version
__version__ = "1.0.0"
# What gets imported with "from mypackage import *"
__all__ = ['Calculator', 'format_result']
# Package-level variable
PACKAGE_NAME = "MyPackage"
print(f"{PACKAGE_NAME} v{__version__} loaded")
# mypackage/core.py
class Calculator:
"""Main calculator class"""
def __init__(self):
self.history = []
def add(self, a, b):
result = a + b
self.history.append(f"{a} + {b} = {result}")
return result
def get_history(self):
return self.history.copy()
# mypackage/utils.py
def format_result(value, decimals=2):
"""Format calculation result"""
return f"{value:.{decimals}f}"
def validate_number(value):
"""Validate if value is a number"""
try:
float(value)
return True
except (ValueError, TypeError):
return False
# mypackage/subpackage/__init__.py
from .advanced import ScientificCalculator
# mypackage/subpackage/advanced.py
import math
from ..core import Calculator
class ScientificCalculator(Calculator):
"""Extended calculator with scientific functions"""
def sin(self, x):
result = math.sin(x)
self.history.append(f"sin({x}) = {result}")
return result
def cos(self, x):
result = math.cos(x)
self.history.append(f"cos({x}) = {result}")
return result
def sqrt(self, x):
if x < 0:
raise ValueError("Cannot take square root of negative number")
result = math.sqrt(x)
self.history.append(f"sqrt({x}) = {result}")
return result
python code snippet end
Using Packages
python code snippet start
# Different ways to import from packages
# Import package (loads __init__.py)
import mypackage
calc = mypackage.Calculator()
result = mypackage.format_result(calc.add(5, 3))
# Import specific module from package
from mypackage import core
calc = core.Calculator()
# Import specific class/function
from mypackage import Calculator, format_result
calc = Calculator()
result = format_result(calc.add(10, 5))
# Import from subpackage
from mypackage.subpackage import ScientificCalculator
sci_calc = ScientificCalculator()
# Relative imports (within package files)
# In mypackage/subpackage/advanced.py:
from ..core import Calculator # Go up one level
from . import some_module # Same level
# Check package information
print(f"Package version: {mypackage.__version__}")
print(f"Package contents: {dir(mypackage)}")
python code snippet end
Advanced Module Concepts
Module Attributes and Introspection
python code snippet start
# Examining modules
import math
# Module documentation
print(f"Math module doc: {math.__doc__}")
# Module file location
print(f"Math module file: {math.__file__}")
# Module name
print(f"Module name: {math.__name__}")
# List all attributes
print("Math module contents:")
for attr in dir(math):
if not attr.startswith('_'):
obj = getattr(math, attr)
print(f" {attr}: {type(obj).__name__}")
# Check if attribute exists
if hasattr(math, 'pi'):
print(f"π = {math.pi}")
# Get attribute dynamically
function_name = 'sqrt'
if hasattr(math, function_name):
sqrt_func = getattr(math, function_name)
result = sqrt_func(16)
print(f"sqrt(16) = {result}")
python code snippet end
Dynamic Imports
python code snippet start
import importlib
def load_module_dynamically(module_name):
"""Load a module dynamically at runtime"""
try:
module = importlib.import_module(module_name)
return module
except ImportError as e:
print(f"Failed to import {module_name}: {e}")
return None
# Load modules based on conditions
modules_to_load = ['math', 'random', 'json']
loaded_modules = {}
for module_name in modules_to_load:
module = load_module_dynamically(module_name)
if module:
loaded_modules[module_name] = module
print(f"Loaded {module_name}")
# Use dynamically loaded modules
if 'math' in loaded_modules:
math_module = loaded_modules['math']
print(f"π = {math_module.pi}")
# Reload a module (useful for development)
if 'calculator' in sys.modules:
calculator = importlib.reload(sys.modules['calculator'])
python code snippet end
Module-Level Configuration
python code snippet start
# config.py - Configuration module
import os
from pathlib import Path
# Environment-based configuration
DEBUG = os.getenv('DEBUG', 'False').lower() == 'true'
DATABASE_URL = os.getenv('DATABASE_URL', 'sqlite:///app.db')
# Path configuration
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / 'data'
LOG_DIR = BASE_DIR / 'logs'
# Create directories if they don't exist
DATA_DIR.mkdir(exist_ok=True)
LOG_DIR.mkdir(exist_ok=True)
# Application settings
SETTINGS = {
'app_name': 'MyApp',
'version': '1.0.0',
'author': 'Your Name',
'max_connections': 100,
'timeout': 30
}
def get_setting(key, default=None):
"""Get a setting value"""
return SETTINGS.get(key, default)
def update_setting(key, value):
"""Update a setting value"""
SETTINGS[key] = value
# Usage in other modules
# from config import DEBUG, DATABASE_URL, get_setting
python code snippet end
Real-World Module Examples
Utility Module
python code snippet start
# utils.py - Common utilities
import os
import json
import hashlib
from datetime import datetime
from typing import Any, Dict, List, Optional
def read_json_file(filepath: str) -> Optional[Dict]:
"""Read JSON file safely"""
try:
with open(filepath, 'r', encoding='utf-8') as file:
return json.load(file)
except (FileNotFoundError, json.JSONDecodeError, IOError) as e:
print(f"Error reading JSON file {filepath}: {e}")
return None
def write_json_file(filepath: str, data: Dict) -> bool:
"""Write data to JSON file safely"""
try:
os.makedirs(os.path.dirname(filepath), exist_ok=True)
with open(filepath, 'w', encoding='utf-8') as file:
json.dump(data, file, indent=2, ensure_ascii=False)
return True
except (IOError, TypeError) as e:
print(f"Error writing JSON file {filepath}: {e}")
return False
def calculate_file_hash(filepath: str, algorithm: str = 'sha256') -> Optional[str]:
"""Calculate hash of a file"""
try:
hash_obj = hashlib.new(algorithm)
with open(filepath, 'rb') as file:
for chunk in iter(lambda: file.read(4096), b""):
hash_obj.update(chunk)
return hash_obj.hexdigest()
except (FileNotFoundError, ValueError) as e:
print(f"Error calculating hash for {filepath}: {e}")
return None
def timestamp() -> str:
"""Get current timestamp as string"""
return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
def chunks(lst: List[Any], chunk_size: int) -> List[List[Any]]:
"""Split list into chunks of specified size"""
return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]
class Timer:
"""Context manager for timing operations"""
def __init__(self, description: str = "Operation"):
self.description = description
self.start_time = None
def __enter__(self):
self.start_time = datetime.now()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
end_time = datetime.now()
duration = end_time - self.start_time
print(f"{self.description} took {duration.total_seconds():.2f} seconds")
# Usage
# from utils import read_json_file, Timer
#
# with Timer("JSON processing"):
# data = read_json_file("config.json")
python code snippet end
Data Processing Module
python code snippet start
# data_processor.py - Data processing utilities
import csv
import pandas as pd
from typing import Dict, List, Any, Optional
from pathlib import Path
class CSVProcessor:
"""Handle CSV file operations"""
def __init__(self, file_path: str):
self.file_path = Path(file_path)
self.data = []
def read(self, encoding: str = 'utf-8') -> bool:
"""Read CSV file"""
try:
with open(self.file_path, 'r', encoding=encoding) as file:
reader = csv.DictReader(file)
self.data = list(reader)
return True
except Exception as e:
print(f"Error reading CSV: {e}")
return False
def write(self, data: List[Dict], encoding: str = 'utf-8') -> bool:
"""Write data to CSV file"""
if not data:
return False
try:
fieldnames = data[0].keys()
with open(self.file_path, 'w', newline='', encoding=encoding) as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
return True
except Exception as e:
print(f"Error writing CSV: {e}")
return False
def filter_data(self, condition_func) -> List[Dict]:
"""Filter data based on condition function"""
return [row for row in self.data if condition_func(row)]
def get_column(self, column_name: str) -> List[Any]:
"""Get all values from a specific column"""
return [row.get(column_name) for row in self.data if column_name in row]
def summarize(self) -> Dict[str, Any]:
"""Get basic statistics about the data"""
if not self.data:
return {}
total_rows = len(self.data)
columns = list(self.data[0].keys()) if self.data else []
return {
'total_rows': total_rows,
'columns': columns,
'column_count': len(columns),
'sample_row': self.data[0] if self.data else None
}
# Advanced data processing functions
def clean_numeric_data(data: List[Dict], numeric_columns: List[str]) -> List[Dict]:
"""Clean numeric data in specified columns"""
cleaned_data = []
for row in data:
cleaned_row = row.copy()
for column in numeric_columns:
if column in cleaned_row:
try:
# Convert to float and handle common issues
value = str(cleaned_row[column]).strip()
value = value.replace(',', '') # Remove thousands separators
value = value.replace('$', '') # Remove currency symbols
cleaned_row[column] = float(value) if value else 0.0
except (ValueError, TypeError):
cleaned_row[column] = 0.0
cleaned_data.append(cleaned_row)
return cleaned_data
def aggregate_data(data: List[Dict], group_by: str, agg_column: str,
agg_func: str = 'sum') -> Dict[str, float]:
"""Aggregate data by grouping column"""
groups = {}
for row in data:
key = row.get(group_by, 'Unknown')
value = row.get(agg_column, 0)
try:
value = float(value)
except (ValueError, TypeError):
value = 0.0
if key not in groups:
groups[key] = []
groups[key].append(value)
# Apply aggregation function
result = {}
for key, values in groups.items():
if agg_func == 'sum':
result[key] = sum(values)
elif agg_func == 'avg':
result[key] = sum(values) / len(values) if values else 0
elif agg_func == 'count':
result[key] = len(values)
elif agg_func == 'max':
result[key] = max(values) if values else 0
elif agg_func == 'min':
result[key] = min(values) if values else 0
return result
# Usage example
if __name__ == "__main__":
# Process a CSV file
processor = CSVProcessor("sales_data.csv")
if processor.read():
print("Data summary:", processor.summarize())
# Filter high-value sales
high_value_sales = processor.filter_data(
lambda row: float(row.get('amount', 0)) > 1000
)
print(f"High-value sales: {len(high_value_sales)}")
python code snippet end
Module Best Practices
Organizing Large Projects
python code snippet start
# Project structure for a web application:
#
# myapp/
# __init__.py
# main.py # Entry point
# config.py # Configuration
# models/ # Data models
# __init__.py
# user.py
# product.py
# views/ # Web views
# __init__.py
# auth.py
# api.py
# utils/ # Utilities
# __init__.py
# database.py
# helpers.py
# tests/ # Tests
# __init__.py
# test_models.py
# test_views.py
# myapp/__init__.py
"""
MyApp - A web application framework
"""
__version__ = "2.0.0"
__author__ = "Your Name"
# Import key components for easy access
from .config import settings
from .models import User, Product
# Package-level configuration
def configure(debug=False, database_url=None):
"""Configure the application"""
if database_url:
settings.DATABASE_URL = database_url
settings.DEBUG = debug
# myapp/models/__init__.py
from .user import User, UserManager
from .product import Product, ProductManager
__all__ = ['User', 'UserManager', 'Product', 'ProductManager']
python code snippet end
Documentation and Type Hints
python code snippet start
# well_documented_module.py
"""
A well-documented module demonstrating best practices.
This module provides utilities for string processing and validation.
It includes functions for cleaning text, validating formats, and
performing common string transformations.
Example:
>>> from well_documented_module import clean_text, validate_email
>>> clean_text(" Hello World! ")
'Hello World!'
>>> validate_email("user@example.com")
True
"""
from typing import Optional, List, Dict, Union
import re
__version__ = "1.0.0"
__author__ = "Your Name <your.email@example.com>"
# Module-level constants
EMAIL_PATTERN = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
PHONE_PATTERN = re.compile(r'^\+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}$')
def clean_text(text: str, remove_extra_spaces: bool = True) -> str:
"""
Clean and normalize text string.
Args:
text: The input text to clean
remove_extra_spaces: Whether to remove extra whitespace
Returns:
Cleaned text string
Example:
>>> clean_text(" Hello World! ")
'Hello World!'
"""
if not isinstance(text, str):
raise TypeError("Input must be a string")
# Remove leading/trailing whitespace
cleaned = text.strip()
# Remove extra spaces if requested
if remove_extra_spaces:
cleaned = re.sub(r'\s+', ' ', cleaned)
return cleaned
def validate_email(email: str) -> bool:
"""
Validate email address format.
Args:
email: Email address to validate
Returns:
True if email format is valid, False otherwise
Example:
>>> validate_email("user@example.com")
True
>>> validate_email("invalid-email")
False
"""
if not isinstance(email, str):
return False
return bool(EMAIL_PATTERN.match(email.strip()))
def parse_name(full_name: str) -> Dict[str, Optional[str]]:
"""
Parse a full name into components.
Args:
full_name: Full name string to parse
Returns:
Dictionary with 'first', 'middle', and 'last' name components
Example:
>>> parse_name("John Michael Smith")
{'first': 'John', 'middle': 'Michael', 'last': 'Smith'}
"""
if not isinstance(full_name, str):
raise TypeError("Full name must be a string")
parts = clean_text(full_name).split()
if len(parts) == 0:
return {'first': None, 'middle': None, 'last': None}
elif len(parts) == 1:
return {'first': parts[0], 'middle': None, 'last': None}
elif len(parts) == 2:
return {'first': parts[0], 'middle': None, 'last': parts[1]}
else:
return {
'first': parts[0],
'middle': ' '.join(parts[1:-1]),
'last': parts[-1]
}
class TextProcessor:
"""
A class for advanced text processing operations.
Attributes:
case_sensitive: Whether operations should be case sensitive
Example:
>>> processor = TextProcessor()
>>> processor.word_count("Hello world hello")
{'hello': 2, 'world': 1}
"""
def __init__(self, case_sensitive: bool = False):
"""
Initialize the text processor.
Args:
case_sensitive: Whether to treat text as case sensitive
"""
self.case_sensitive = case_sensitive
def word_count(self, text: str) -> Dict[str, int]:
"""
Count occurrences of each word in text.
Args:
text: Input text to analyze
Returns:
Dictionary mapping words to their counts
"""
if not isinstance(text, str):
raise TypeError("Input must be a string")
# Clean and split text
cleaned = clean_text(text)
if not self.case_sensitive:
cleaned = cleaned.lower()
words = cleaned.split()
# Count words
word_counts = {}
for word in words:
# Remove punctuation
word = re.sub(r'[^\w]', '', word)
if word: # Skip empty strings
word_counts[word] = word_counts.get(word, 0) + 1
return word_counts
def extract_numbers(self, text: str) -> List[float]:
"""
Extract all numbers from text.
Args:
text: Input text containing numbers
Returns:
List of numbers found in the text
"""
if not isinstance(text, str):
raise TypeError("Input must be a string")
# Find all number patterns
number_pattern = r'-?\d+\.?\d*'
matches = re.findall(number_pattern, text)
# Convert to float
numbers = []
for match in matches:
try:
numbers.append(float(match))
except ValueError:
continue # Skip invalid numbers
return numbers
# Module-level convenience functions
def quick_clean(text: str) -> str:
"""Quick text cleaning with default settings."""
return clean_text(text, remove_extra_spaces=True)
def is_valid_email(email: str) -> bool:
"""Alias for validate_email for backward compatibility."""
return validate_email(email)
# What gets imported with "from module import *"
__all__ = [
'clean_text',
'validate_email',
'parse_name',
'TextProcessor',
'quick_clean',
'is_valid_email'
]
if __name__ == "__main__":
# Demo when run as script
print(f"Text Processing Module v{__version__}")
print("Demo:")
sample_text = " Hello World! This has 123 numbers and email@example.com "
print(f"Original: '{sample_text}'")
print(f"Cleaned: '{clean_text(sample_text)}'")
processor = TextProcessor()
word_counts = processor.word_count(sample_text)
print(f"Word counts: {word_counts}")
numbers = processor.extract_numbers(sample_text)
print(f"Numbers found: {numbers}")
python code snippet end
Python’s module system is the foundation for organizing code, creating reusable components, and building maintainable applications that scale from simple scripts to complex frameworks.
Effective module design pairs well with type hints for better documentation and pathlib for modern file handling when working with configuration files and data. Testing modules requires unit testing frameworks and for exploring the broader ecosystem, see standard library tour and next steps in Python development .
Reference: Python Tutorial - Modules