Migration squashing combines multiple migrations into a single, optimized migration file. This process helps maintain a clean migration history, improves performance, and reduces complexity in long-running projects. Understanding when and how to squash migrations is essential for maintaining a healthy Django project.
# Before squashing: Multiple migrations
# blog/migrations/0001_initial.py
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name='Post',
fields=[
('id', models.AutoField(primary_key=True)),
('title', models.CharField(max_length=200)),
('content', models.TextField()),
],
),
]
# blog/migrations/0002_add_author.py
class Migration(migrations.Migration):
dependencies = [('blog', '0001_initial')]
operations = [
migrations.AddField(
model_name='post',
name='author',
field=models.CharField(max_length=100, default='Anonymous'),
),
]
# blog/migrations/0003_add_created_at.py
class Migration(migrations.Migration):
dependencies = [('blog', '0002_add_author')]
operations = [
migrations.AddField(
model_name='post',
name='created_at',
field=models.DateTimeField(auto_now_add=True, default=timezone.now),
preserve_default=False,
),
]
# blog/migrations/0004_add_status.py
class Migration(migrations.Migration):
dependencies = [('blog', '0003_add_created_at')]
operations = [
migrations.AddField(
model_name='post',
name='status',
field=models.CharField(max_length=20, default='draft'),
),
]
# After squashing: Single optimized migration
# Command: python manage.py squashmigrations blog 0001 0004
# Generated: blog/migrations/0001_squashed_0001_0004_initial.py
class Migration(migrations.Migration):
"""Squashed migration combining 0001-0004"""
initial = True
dependencies = []
# List of migrations this squashed migration replaces
replaces = [
('blog', '0001_initial'),
('blog', '0002_add_author'),
('blog', '0003_add_created_at'),
('blog', '0004_add_status'),
]
operations = [
# Optimized: Create model with all fields at once
migrations.CreateModel(
name='Post',
fields=[
('id', models.AutoField(primary_key=True)),
('title', models.CharField(max_length=200)),
('content', models.TextField()),
('author', models.CharField(max_length=100, default='Anonymous')),
('created_at', models.DateTimeField(auto_now_add=True)),
('status', models.CharField(max_length=20, default='draft')),
],
),
]
# Basic squashing command
# python manage.py squashmigrations app_name start_migration end_migration
# Examples:
# python manage.py squashmigrations blog 0001 0004
# python manage.py squashmigrations blog 0005 0010 --squashed-name combined_features
class MigrationSquashingAnalyzer:
"""Analyze when migrations should be squashed"""
def __init__(self, app_label):
self.app_label = app_label
from django.db.migrations.loader import MigrationLoader
self.loader = MigrationLoader(connection)
def analyze_squashing_candidates(self):
"""Identify migrations that are good candidates for squashing"""
analysis = {
'total_migrations': 0,
'squashing_opportunities': [],
'recommendations': []
}
# Get all migrations for the app
app_migrations = []
for migration_key in self.loader.graph.nodes:
if migration_key[0] == self.app_label:
app_migrations.append(migration_key[1])
app_migrations.sort()
analysis['total_migrations'] = len(app_migrations)
# Analyze migration patterns
if len(app_migrations) > 20:
analysis['recommendations'].append(
f"App has {len(app_migrations)} migrations - consider squashing older migrations"
)
# Look for sequences of related migrations
sequences = self._find_migration_sequences(app_migrations)
for sequence in sequences:
if len(sequence) >= 5:
analysis['squashing_opportunities'].append({
'start_migration': sequence[0],
'end_migration': sequence[-1],
'migration_count': len(sequence),
'reason': 'Long sequence of related migrations'
})
# Look for old migrations that are safe to squash
if len(app_migrations) > 10:
# Suggest squashing first 5-10 migrations if they're old enough
old_migrations = app_migrations[:10]
analysis['squashing_opportunities'].append({
'start_migration': old_migrations[0],
'end_migration': old_migrations[-1],
'migration_count': len(old_migrations),
'reason': 'Old migrations that can be safely combined'
})
return analysis
def _find_migration_sequences(self, migrations):
"""Find sequences of migrations that modify the same models"""
sequences = []
current_sequence = []
for migration_name in migrations:
migration = self.loader.get_migration(self.app_label, migration_name)
# Analyze operations to see if they're related to previous migration
if self._is_related_to_sequence(migration, current_sequence):
current_sequence.append(migration_name)
else:
if len(current_sequence) > 1:
sequences.append(current_sequence)
current_sequence = [migration_name]
# Add final sequence
if len(current_sequence) > 1:
sequences.append(current_sequence)
return sequences
def _is_related_to_sequence(self, migration, current_sequence):
"""Check if migration is related to current sequence"""
if not current_sequence:
return True
# Get models affected by this migration
affected_models = set()
for operation in migration.operations:
if hasattr(operation, 'model_name'):
affected_models.add(operation.model_name.lower())
elif hasattr(operation, 'name'):
affected_models.add(operation.name.lower())
# Check if it affects similar models to recent migrations in sequence
# This is a simplified heuristic
return len(affected_models) > 0
def check_squashing_safety(self, start_migration, end_migration):
"""Check if it's safe to squash a range of migrations"""
safety_check = {
'safe_to_squash': True,
'warnings': [],
'blocking_issues': []
}
# Get migrations in range
app_migrations = []
for migration_key in self.loader.graph.nodes:
if migration_key[0] == self.app_label:
app_migrations.append(migration_key[1])
app_migrations.sort()
try:
start_idx = app_migrations.index(start_migration)
end_idx = app_migrations.index(end_migration)
except ValueError:
safety_check['safe_to_squash'] = False
safety_check['blocking_issues'].append("Invalid migration range")
return safety_check
migrations_to_squash = app_migrations[start_idx:end_idx + 1]
# Check for complex operations
for migration_name in migrations_to_squash:
migration = self.loader.get_migration(self.app_label, migration_name)
for operation in migration.operations:
op_name = operation.__class__.__name__
# Check for operations that might not squash well
if op_name == 'RunPython':
safety_check['warnings'].append(
f"Migration {migration_name} contains RunPython operation"
)
elif op_name == 'RunSQL':
safety_check['warnings'].append(
f"Migration {migration_name} contains RunSQL operation"
)
elif op_name in ['RemoveField', 'DeleteModel']:
safety_check['warnings'].append(
f"Migration {migration_name} contains destructive operation: {op_name}"
)
# Check for external dependencies
for migration_name in migrations_to_squash:
migration = self.loader.get_migration(self.app_label, migration_name)
for dep_app, dep_migration in migration.dependencies:
if dep_app != self.app_label:
safety_check['warnings'].append(
f"Migration {migration_name} depends on external app: {dep_app}.{dep_migration}"
)
return safety_check
class ManualSquashingProcess:
"""Manual process for squashing migrations"""
@staticmethod
def prepare_for_squashing():
"""Prepare project for migration squashing"""
preparation_steps = [
"1. Ensure all migrations are applied in all environments",
"2. Create backup of migration files",
"3. Verify no pending migrations exist",
"4. Check that all team members have latest migrations",
"5. Ensure CI/CD pipeline is not running migrations",
"6. Document current migration state"
]
return preparation_steps
@staticmethod
def create_squashed_migration():
"""Create a squashed migration manually"""
# Example of manually creating a squashed migration
squashed_migration_template = '''
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
"""
Squashed migration combining migrations 0001-0005
This migration replaces:
- 0001_initial: Created Post model
- 0002_add_author: Added author field
- 0003_add_created_at: Added created_at field
- 0004_add_status: Added status field
- 0005_add_category: Added category relationship
Safe to apply on fresh databases.
"""
initial = True
dependencies = [
('auth', '0012_alter_user_first_name_max_length'),
]
replaces = [
('blog', '0001_initial'),
('blog', '0002_add_author'),
('blog', '0003_add_created_at'),
('blog', '0004_add_status'),
('blog', '0005_add_category'),
]
operations = [
migrations.CreateModel(
name='Category',
fields=[
('id', models.AutoField(primary_key=True)),
('name', models.CharField(max_length=100)),
('description', models.TextField(blank=True)),
],
),
migrations.CreateModel(
name='Post',
fields=[
('id', models.AutoField(primary_key=True)),
('title', models.CharField(max_length=200)),
('content', models.TextField()),
('author', models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to='auth.user'
)),
('created_at', models.DateTimeField(auto_now_add=True)),
('status', models.CharField(
choices=[('draft', 'Draft'), ('published', 'Published')],
default='draft',
max_length=20
)),
('category', models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to='blog.category'
)),
],
options={
'ordering': ['-created_at'],
},
),
]
'''
return squashed_migration_template
@staticmethod
def validate_squashed_migration():
"""Validate that squashed migration works correctly"""
validation_steps = [
"1. Test migration on fresh database",
"2. Compare final schema with original migrations",
"3. Verify all indexes and constraints are created",
"4. Test migration rollback if applicable",
"5. Check that data migrations are preserved",
"6. Validate foreign key relationships",
"7. Test with different database backends if used"
]
return validation_steps
@staticmethod
def cleanup_after_squashing():
"""Clean up after successful squashing"""
cleanup_steps = [
"1. Remove original migration files (keep backup)",
"2. Update migration dependencies in other apps if needed",
"3. Test full migration process from scratch",
"4. Update documentation",
"5. Notify team of migration changes",
"6. Update deployment scripts if they reference specific migrations"
]
return cleanup_steps
# Advanced squashing techniques
class AdvancedSquashingTechniques:
"""Advanced techniques for migration squashing"""
@staticmethod
def squash_with_data_preservation():
"""Squash migrations while preserving data migrations"""
def create_data_preserving_squash():
"""Create squashed migration that preserves data operations"""
# When squashing migrations with RunPython operations,
# you need to preserve the data migration logic
def combined_data_migration(apps, schema_editor):
"""Combined data migration from multiple migrations"""
Post = apps.get_model('blog', 'Post')
# Data migration from 0002: Set default authors
Post.objects.filter(author__isnull=True).update(
author_id=1 # Default admin user
)
# Data migration from 0004: Set default status
Post.objects.filter(status__isnull=True).update(
status='draft'
)
# Data migration from 0005: Assign default category
Category = apps.get_model('blog', 'Category')
default_category, created = Category.objects.get_or_create(
name='General',
defaults={'description': 'General posts'}
)
Post.objects.filter(category__isnull=True).update(
category=default_category
)
def reverse_combined_data_migration(apps, schema_editor):
"""Reverse the combined data migration"""
# Usually just clear the fields back to null
Post = apps.get_model('blog', 'Post')
Post.objects.update(
author=None,
status=None,
category=None
)
# The squashed migration would include:
squashed_operations = [
# Schema operations (combined)
migrations.CreateModel(name='Category', fields=[...]),
migrations.CreateModel(name='Post', fields=[...]),
# Combined data migration
migrations.RunPython(
code=combined_data_migration,
reverse_code=reverse_combined_data_migration,
),
]
return squashed_operations
return create_data_preserving_squash
@staticmethod
def conditional_squashing():
"""Squash migrations conditionally based on environment"""
def conditional_squash_migration(apps, schema_editor):
"""Migration that behaves differently based on existing data"""
Post = apps.get_model('blog', 'Post')
# Check if this is a fresh installation or existing data
existing_posts = Post.objects.exists()
if not existing_posts:
# Fresh installation - create everything at once
print("Fresh installation detected - creating optimized schema")
# All schema operations are handled by the CreateModel operations
# No additional data migrations needed
else:
# Existing installation - need to handle data carefully
print("Existing data detected - running compatibility migrations")
# Run data migrations that would have been in original migrations
# This ensures existing installations work correctly
# Example: Populate fields that were added in separate migrations
Post.objects.filter(status__isnull=True).update(status='draft')
Post.objects.filter(author__isnull=True).update(author_id=1)
return conditional_squash_migration
@staticmethod
def optimize_squashed_operations():
"""Optimize operations in squashed migrations"""
def optimize_operations(original_operations):
"""Optimize a list of migration operations"""
optimized = []
# Group operations by model
operations_by_model = {}
for operation in original_operations:
model_name = getattr(operation, 'model_name', None) or getattr(operation, 'name', None)
if model_name:
if model_name not in operations_by_model:
operations_by_model[model_name] = []
operations_by_model[model_name].append(operation)
else:
# Non-model operations (like RunPython)
optimized.append(operation)
# Optimize operations for each model
for model_name, model_operations in operations_by_model.items():
optimized_model_ops = optimize_model_operations(model_operations)
optimized.extend(optimized_model_ops)
return optimized
def optimize_model_operations(operations):
"""Optimize operations for a single model"""
# Look for patterns that can be optimized
# Pattern 1: CreateModel followed by AddField operations
create_model_op = None
add_field_ops = []
other_ops = []
for op in operations:
if op.__class__.__name__ == 'CreateModel':
create_model_op = op
elif op.__class__.__name__ == 'AddField':
add_field_ops.append(op)
else:
other_ops.append(op)
optimized = []
if create_model_op and add_field_ops:
# Combine CreateModel with AddField operations
combined_fields = list(create_model_op.fields)
for add_field_op in add_field_ops:
combined_fields.append((add_field_op.name, add_field_op.field))
# Create optimized CreateModel operation
optimized_create = migrations.CreateModel(
name=create_model_op.name,
fields=combined_fields,
options=create_model_op.options,
bases=create_model_op.bases,
managers=create_model_op.managers,
)
optimized.append(optimized_create)
elif create_model_op:
optimized.append(create_model_op)
# Add other operations
optimized.extend(other_ops)
return optimized
return optimize_operations
class ProductionSquashingStrategy:
"""Strategies for safely squashing migrations in production"""
@staticmethod
def create_production_squashing_plan():
"""Create plan for squashing migrations in production environment"""
plan = {
'pre_squashing': [
"1. Audit all environments to ensure migrations are synchronized",
"2. Create comprehensive backup of all databases",
"3. Document current migration state across all environments",
"4. Freeze new migration creation during squashing process",
"5. Notify all team members of squashing timeline",
"6. Prepare rollback procedures"
],
'squashing_process': [
"1. Create squashed migration in development",
"2. Test squashed migration on copy of production data",
"3. Validate that squashed migration produces identical schema",
"4. Test rollback procedures with squashed migration",
"5. Deploy squashed migration to staging environment",
"6. Perform full application testing in staging"
],
'deployment': [
"1. Schedule maintenance window for production deployment",
"2. Apply squashed migration to production",
"3. Verify schema integrity after deployment",
"4. Run application smoke tests",
"5. Monitor application performance and error rates",
"6. Clean up old migration files after successful deployment"
],
'post_deployment': [
"1. Update deployment documentation",
"2. Notify team of successful squashing",
"3. Resume normal migration development process",
"4. Archive old migration backups after retention period",
"5. Update CI/CD pipelines if needed"
]
}
return plan
@staticmethod
def create_zero_downtime_squashing():
"""Create strategy for zero-downtime migration squashing"""
def zero_downtime_squash():
"""Implement zero-downtime squashing strategy"""
# Phase 1: Deploy squashed migration alongside old migrations
phase1_migration = '''
class Migration(migrations.Migration):
"""
Phase 1: Squashed migration that coexists with old migrations
This migration is marked as applied but doesn't actually run
"""
dependencies = [
('blog', '0010_latest_individual_migration'),
]
# Mark as already applied since schema already exists
operations = []
# This migration replaces the old ones but doesn't change schema
replaces = [
('blog', '0001_initial'),
('blog', '0002_add_author'),
# ... other migrations
('blog', '0010_latest_individual_migration'),
]
'''
# Phase 2: Remove old migration files after squashed migration is applied
phase2_steps = [
"1. Verify squashed migration is applied in all environments",
"2. Remove old migration files from codebase",
"3. Deploy code without old migration files",
"4. Verify application continues to work normally"
]
return phase1_migration, phase2_steps
return zero_downtime_squash
@staticmethod
def handle_squashing_conflicts():
"""Handle conflicts that arise during squashing"""
conflict_resolution = {
'dependency_conflicts': {
'description': 'External apps depend on migrations being squashed',
'solution': [
"1. Identify all external dependencies",
"2. Update external app migrations to depend on squashed migration",
"3. Coordinate deployment across all affected apps",
"4. Test integration between apps after squashing"
]
},
'data_migration_conflicts': {
'description': 'Data migrations have complex interdependencies',
'solution': [
"1. Analyze data migration dependencies carefully",
"2. Combine data migrations in correct order",
"3. Test combined data migration on production-like data",
"4. Consider keeping complex data migrations separate"
]
},
'schema_conflicts': {
'description': 'Schema changes conflict when combined',
'solution': [
"1. Identify conflicting schema operations",
"2. Resolve conflicts by choosing final desired state",
"3. Test that final schema matches expectations",
"4. Verify all constraints and indexes are correct"
]
},
'rollback_conflicts': {
'description': 'Squashed migration cannot be safely rolled back',
'solution': [
"1. Document that squashed migration is not reversible",
"2. Create separate rollback procedures if needed",
"3. Ensure backup and restore procedures are available",
"4. Test rollback procedures in staging environment"
]
}
}
return conflict_resolution
# Automated squashing tools
class AutomatedSquashingTools:
"""Tools for automating migration squashing"""
@staticmethod
def create_squashing_script():
"""Create script to automate squashing process"""
script_content = '''#!/bin/bash
# Automated migration squashing script
set -e
APP_NAME=$1
START_MIGRATION=$2
END_MIGRATION=$3
if [ -z "$APP_NAME" ] || [ -z "$START_MIGRATION" ] || [ -z "$END_MIGRATION" ]; then
echo "Usage: $0 <app_name> <start_migration> <end_migration>"
exit 1
fi
echo "Starting migration squashing for $APP_NAME: $START_MIGRATION to $END_MIGRATION"
# Backup existing migrations
echo "Creating backup of existing migrations..."
cp -r $APP_NAME/migrations $APP_NAME/migrations_backup_$(date +%Y%m%d_%H%M%S)
# Check current migration state
echo "Checking current migration state..."
python manage.py showmigrations $APP_NAME
# Create squashed migration
echo "Creating squashed migration..."
python manage.py squashmigrations $APP_NAME $START_MIGRATION $END_MIGRATION
# Test squashed migration
echo "Testing squashed migration on test database..."
python manage.py migrate --database=test
# Validate schema
echo "Validating schema..."
python manage.py sqlmigrate $APP_NAME $(ls $APP_NAME/migrations/*squashed*.py | head -1 | sed 's/.*\\///;s/\\.py//')
echo "Squashing completed successfully!"
echo "Please review the squashed migration before deploying to production."
'''
return script_content
@staticmethod
def create_squashing_validator():
"""Create validator for squashed migrations"""
def validate_squashed_migration(app_label, squashed_migration_name):
"""Validate that squashed migration is correct"""
from django.db.migrations.loader import MigrationLoader
from django.db.migrations.state import ProjectState
loader = MigrationLoader(connection)
validation_results = {
'valid': True,
'errors': [],
'warnings': []
}
try:
# Get squashed migration
squashed_migration = loader.get_migration(app_label, squashed_migration_name)
# Check replaces attribute
if not hasattr(squashed_migration, 'replaces') or not squashed_migration.replaces:
validation_results['errors'].append(
"Squashed migration missing 'replaces' attribute"
)
# Validate that replaced migrations exist
for replaced_app, replaced_name in squashed_migration.replaces:
if (replaced_app, replaced_name) not in loader.graph.nodes:
validation_results['warnings'].append(
f"Replaced migration {replaced_app}.{replaced_name} not found"
)
# Check dependencies
for dep_app, dep_name in squashed_migration.dependencies:
if (dep_app, dep_name) not in loader.graph.nodes:
validation_results['errors'].append(
f"Dependency {dep_app}.{dep_name} not found"
)
# Validate operations
for operation in squashed_migration.operations:
if hasattr(operation, 'reversible') and not operation.reversible:
validation_results['warnings'].append(
f"Operation {operation.__class__.__name__} is not reversible"
)
except Exception as e:
validation_results['valid'] = False
validation_results['errors'].append(f"Error loading squashed migration: {e}")
return validation_results
return validate_squashed_migration
Migration squashing is a powerful technique for maintaining clean migration histories and improving performance. Proper planning, testing, and deployment strategies ensure that squashing enhances rather than complicates your Django project's migration management.
Data Migrations
Data migrations allow you to transform, populate, or clean up data during schema changes. Unlike schema migrations that modify database structure, data migrations work with the actual data in your database. Understanding how to write effective data migrations is crucial for maintaining data integrity during application evolution.
Serializing Values
Django migrations need to serialize Python values into migration files so they can be recreated when migrations run. Understanding how Django serializes values and how to handle custom serialization is crucial for creating robust migrations with complex data types and custom objects.