Migrations

Squashing Migrations

Migration squashing combines multiple migrations into a single, optimized migration file. This process helps maintain a clean migration history, improves performance, and reduces complexity in long-running projects. Understanding when and how to squash migrations is essential for maintaining a healthy Django project.

Squashing Migrations

Migration squashing combines multiple migrations into a single, optimized migration file. This process helps maintain a clean migration history, improves performance, and reduces complexity in long-running projects. Understanding when and how to squash migrations is essential for maintaining a healthy Django project.

Understanding Migration Squashing

What is Migration Squashing?

# Before squashing: Multiple migrations
# blog/migrations/0001_initial.py
class Migration(migrations.Migration):
    initial = True
    dependencies = []
    
    operations = [
        migrations.CreateModel(
            name='Post',
            fields=[
                ('id', models.AutoField(primary_key=True)),
                ('title', models.CharField(max_length=200)),
                ('content', models.TextField()),
            ],
        ),
    ]

# blog/migrations/0002_add_author.py
class Migration(migrations.Migration):
    dependencies = [('blog', '0001_initial')]
    
    operations = [
        migrations.AddField(
            model_name='post',
            name='author',
            field=models.CharField(max_length=100, default='Anonymous'),
        ),
    ]

# blog/migrations/0003_add_created_at.py
class Migration(migrations.Migration):
    dependencies = [('blog', '0002_add_author')]
    
    operations = [
        migrations.AddField(
            model_name='post',
            name='created_at',
            field=models.DateTimeField(auto_now_add=True, default=timezone.now),
            preserve_default=False,
        ),
    ]

# blog/migrations/0004_add_status.py
class Migration(migrations.Migration):
    dependencies = [('blog', '0003_add_created_at')]
    
    operations = [
        migrations.AddField(
            model_name='post',
            name='status',
            field=models.CharField(max_length=20, default='draft'),
        ),
    ]

# After squashing: Single optimized migration
# Command: python manage.py squashmigrations blog 0001 0004
# Generated: blog/migrations/0001_squashed_0001_0004_initial.py

class Migration(migrations.Migration):
    """Squashed migration combining 0001-0004"""
    
    initial = True
    
    dependencies = []
    
    # List of migrations this squashed migration replaces
    replaces = [
        ('blog', '0001_initial'),
        ('blog', '0002_add_author'),
        ('blog', '0003_add_created_at'),
        ('blog', '0004_add_status'),
    ]
    
    operations = [
        # Optimized: Create model with all fields at once
        migrations.CreateModel(
            name='Post',
            fields=[
                ('id', models.AutoField(primary_key=True)),
                ('title', models.CharField(max_length=200)),
                ('content', models.TextField()),
                ('author', models.CharField(max_length=100, default='Anonymous')),
                ('created_at', models.DateTimeField(auto_now_add=True)),
                ('status', models.CharField(max_length=20, default='draft')),
            ],
        ),
    ]

# Basic squashing command
# python manage.py squashmigrations app_name start_migration end_migration

# Examples:
# python manage.py squashmigrations blog 0001 0004
# python manage.py squashmigrations blog 0005 0010 --squashed-name combined_features

When to Squash Migrations

class MigrationSquashingAnalyzer:
    """Analyze when migrations should be squashed"""
    
    def __init__(self, app_label):
        self.app_label = app_label
        from django.db.migrations.loader import MigrationLoader
        self.loader = MigrationLoader(connection)
    
    def analyze_squashing_candidates(self):
        """Identify migrations that are good candidates for squashing"""
        
        analysis = {
            'total_migrations': 0,
            'squashing_opportunities': [],
            'recommendations': []
        }
        
        # Get all migrations for the app
        app_migrations = []
        
        for migration_key in self.loader.graph.nodes:
            if migration_key[0] == self.app_label:
                app_migrations.append(migration_key[1])
        
        app_migrations.sort()
        analysis['total_migrations'] = len(app_migrations)
        
        # Analyze migration patterns
        if len(app_migrations) > 20:
            analysis['recommendations'].append(
                f"App has {len(app_migrations)} migrations - consider squashing older migrations"
            )
        
        # Look for sequences of related migrations
        sequences = self._find_migration_sequences(app_migrations)
        
        for sequence in sequences:
            if len(sequence) >= 5:
                analysis['squashing_opportunities'].append({
                    'start_migration': sequence[0],
                    'end_migration': sequence[-1],
                    'migration_count': len(sequence),
                    'reason': 'Long sequence of related migrations'
                })
        
        # Look for old migrations that are safe to squash
        if len(app_migrations) > 10:
            # Suggest squashing first 5-10 migrations if they're old enough
            old_migrations = app_migrations[:10]
            
            analysis['squashing_opportunities'].append({
                'start_migration': old_migrations[0],
                'end_migration': old_migrations[-1],
                'migration_count': len(old_migrations),
                'reason': 'Old migrations that can be safely combined'
            })
        
        return analysis
    
    def _find_migration_sequences(self, migrations):
        """Find sequences of migrations that modify the same models"""
        
        sequences = []
        current_sequence = []
        
        for migration_name in migrations:
            migration = self.loader.get_migration(self.app_label, migration_name)
            
            # Analyze operations to see if they're related to previous migration
            if self._is_related_to_sequence(migration, current_sequence):
                current_sequence.append(migration_name)
            else:
                if len(current_sequence) > 1:
                    sequences.append(current_sequence)
                current_sequence = [migration_name]
        
        # Add final sequence
        if len(current_sequence) > 1:
            sequences.append(current_sequence)
        
        return sequences
    
    def _is_related_to_sequence(self, migration, current_sequence):
        """Check if migration is related to current sequence"""
        
        if not current_sequence:
            return True
        
        # Get models affected by this migration
        affected_models = set()
        
        for operation in migration.operations:
            if hasattr(operation, 'model_name'):
                affected_models.add(operation.model_name.lower())
            elif hasattr(operation, 'name'):
                affected_models.add(operation.name.lower())
        
        # Check if it affects similar models to recent migrations in sequence
        # This is a simplified heuristic
        return len(affected_models) > 0
    
    def check_squashing_safety(self, start_migration, end_migration):
        """Check if it's safe to squash a range of migrations"""
        
        safety_check = {
            'safe_to_squash': True,
            'warnings': [],
            'blocking_issues': []
        }
        
        # Get migrations in range
        app_migrations = []
        
        for migration_key in self.loader.graph.nodes:
            if migration_key[0] == self.app_label:
                app_migrations.append(migration_key[1])
        
        app_migrations.sort()
        
        try:
            start_idx = app_migrations.index(start_migration)
            end_idx = app_migrations.index(end_migration)
        except ValueError:
            safety_check['safe_to_squash'] = False
            safety_check['blocking_issues'].append("Invalid migration range")
            return safety_check
        
        migrations_to_squash = app_migrations[start_idx:end_idx + 1]
        
        # Check for complex operations
        for migration_name in migrations_to_squash:
            migration = self.loader.get_migration(self.app_label, migration_name)
            
            for operation in migration.operations:
                op_name = operation.__class__.__name__
                
                # Check for operations that might not squash well
                if op_name == 'RunPython':
                    safety_check['warnings'].append(
                        f"Migration {migration_name} contains RunPython operation"
                    )
                
                elif op_name == 'RunSQL':
                    safety_check['warnings'].append(
                        f"Migration {migration_name} contains RunSQL operation"
                    )
                
                elif op_name in ['RemoveField', 'DeleteModel']:
                    safety_check['warnings'].append(
                        f"Migration {migration_name} contains destructive operation: {op_name}"
                    )
        
        # Check for external dependencies
        for migration_name in migrations_to_squash:
            migration = self.loader.get_migration(self.app_label, migration_name)
            
            for dep_app, dep_migration in migration.dependencies:
                if dep_app != self.app_label:
                    safety_check['warnings'].append(
                        f"Migration {migration_name} depends on external app: {dep_app}.{dep_migration}"
                    )
        
        return safety_check

Squashing Process and Best Practices

Manual Squashing Process

class ManualSquashingProcess:
    """Manual process for squashing migrations"""
    
    @staticmethod
    def prepare_for_squashing():
        """Prepare project for migration squashing"""
        
        preparation_steps = [
            "1. Ensure all migrations are applied in all environments",
            "2. Create backup of migration files",
            "3. Verify no pending migrations exist",
            "4. Check that all team members have latest migrations",
            "5. Ensure CI/CD pipeline is not running migrations",
            "6. Document current migration state"
        ]
        
        return preparation_steps
    
    @staticmethod
    def create_squashed_migration():
        """Create a squashed migration manually"""
        
        # Example of manually creating a squashed migration
        squashed_migration_template = '''
from django.db import migrations, models
import django.db.models.deletion

class Migration(migrations.Migration):
    """
    Squashed migration combining migrations 0001-0005
    
    This migration replaces:
    - 0001_initial: Created Post model
    - 0002_add_author: Added author field
    - 0003_add_created_at: Added created_at field
    - 0004_add_status: Added status field
    - 0005_add_category: Added category relationship
    
    Safe to apply on fresh databases.
    """
    
    initial = True
    
    dependencies = [
        ('auth', '0012_alter_user_first_name_max_length'),
    ]
    
    replaces = [
        ('blog', '0001_initial'),
        ('blog', '0002_add_author'),
        ('blog', '0003_add_created_at'),
        ('blog', '0004_add_status'),
        ('blog', '0005_add_category'),
    ]
    
    operations = [
        migrations.CreateModel(
            name='Category',
            fields=[
                ('id', models.AutoField(primary_key=True)),
                ('name', models.CharField(max_length=100)),
                ('description', models.TextField(blank=True)),
            ],
        ),
        migrations.CreateModel(
            name='Post',
            fields=[
                ('id', models.AutoField(primary_key=True)),
                ('title', models.CharField(max_length=200)),
                ('content', models.TextField()),
                ('author', models.ForeignKey(
                    on_delete=django.db.models.deletion.CASCADE,
                    to='auth.user'
                )),
                ('created_at', models.DateTimeField(auto_now_add=True)),
                ('status', models.CharField(
                    choices=[('draft', 'Draft'), ('published', 'Published')],
                    default='draft',
                    max_length=20
                )),
                ('category', models.ForeignKey(
                    on_delete=django.db.models.deletion.CASCADE,
                    to='blog.category'
                )),
            ],
            options={
                'ordering': ['-created_at'],
            },
        ),
    ]
'''
        
        return squashed_migration_template
    
    @staticmethod
    def validate_squashed_migration():
        """Validate that squashed migration works correctly"""
        
        validation_steps = [
            "1. Test migration on fresh database",
            "2. Compare final schema with original migrations",
            "3. Verify all indexes and constraints are created",
            "4. Test migration rollback if applicable",
            "5. Check that data migrations are preserved",
            "6. Validate foreign key relationships",
            "7. Test with different database backends if used"
        ]
        
        return validation_steps
    
    @staticmethod
    def cleanup_after_squashing():
        """Clean up after successful squashing"""
        
        cleanup_steps = [
            "1. Remove original migration files (keep backup)",
            "2. Update migration dependencies in other apps if needed",
            "3. Test full migration process from scratch",
            "4. Update documentation",
            "5. Notify team of migration changes",
            "6. Update deployment scripts if they reference specific migrations"
        ]
        
        return cleanup_steps

# Advanced squashing techniques
class AdvancedSquashingTechniques:
    """Advanced techniques for migration squashing"""
    
    @staticmethod
    def squash_with_data_preservation():
        """Squash migrations while preserving data migrations"""
        
        def create_data_preserving_squash():
            """Create squashed migration that preserves data operations"""
            
            # When squashing migrations with RunPython operations,
            # you need to preserve the data migration logic
            
            def combined_data_migration(apps, schema_editor):
                """Combined data migration from multiple migrations"""
                
                Post = apps.get_model('blog', 'Post')
                
                # Data migration from 0002: Set default authors
                Post.objects.filter(author__isnull=True).update(
                    author_id=1  # Default admin user
                )
                
                # Data migration from 0004: Set default status
                Post.objects.filter(status__isnull=True).update(
                    status='draft'
                )
                
                # Data migration from 0005: Assign default category
                Category = apps.get_model('blog', 'Category')
                default_category, created = Category.objects.get_or_create(
                    name='General',
                    defaults={'description': 'General posts'}
                )
                
                Post.objects.filter(category__isnull=True).update(
                    category=default_category
                )
            
            def reverse_combined_data_migration(apps, schema_editor):
                """Reverse the combined data migration"""
                
                # Usually just clear the fields back to null
                Post = apps.get_model('blog', 'Post')
                Post.objects.update(
                    author=None,
                    status=None,
                    category=None
                )
            
            # The squashed migration would include:
            squashed_operations = [
                # Schema operations (combined)
                migrations.CreateModel(name='Category', fields=[...]),
                migrations.CreateModel(name='Post', fields=[...]),
                
                # Combined data migration
                migrations.RunPython(
                    code=combined_data_migration,
                    reverse_code=reverse_combined_data_migration,
                ),
            ]
            
            return squashed_operations
        
        return create_data_preserving_squash
    
    @staticmethod
    def conditional_squashing():
        """Squash migrations conditionally based on environment"""
        
        def conditional_squash_migration(apps, schema_editor):
            """Migration that behaves differently based on existing data"""
            
            Post = apps.get_model('blog', 'Post')
            
            # Check if this is a fresh installation or existing data
            existing_posts = Post.objects.exists()
            
            if not existing_posts:
                # Fresh installation - create everything at once
                print("Fresh installation detected - creating optimized schema")
                
                # All schema operations are handled by the CreateModel operations
                # No additional data migrations needed
                
            else:
                # Existing installation - need to handle data carefully
                print("Existing data detected - running compatibility migrations")
                
                # Run data migrations that would have been in original migrations
                # This ensures existing installations work correctly
                
                # Example: Populate fields that were added in separate migrations
                Post.objects.filter(status__isnull=True).update(status='draft')
                Post.objects.filter(author__isnull=True).update(author_id=1)
        
        return conditional_squash_migration
    
    @staticmethod
    def optimize_squashed_operations():
        """Optimize operations in squashed migrations"""
        
        def optimize_operations(original_operations):
            """Optimize a list of migration operations"""
            
            optimized = []
            
            # Group operations by model
            operations_by_model = {}
            
            for operation in original_operations:
                model_name = getattr(operation, 'model_name', None) or getattr(operation, 'name', None)
                
                if model_name:
                    if model_name not in operations_by_model:
                        operations_by_model[model_name] = []
                    operations_by_model[model_name].append(operation)
                else:
                    # Non-model operations (like RunPython)
                    optimized.append(operation)
            
            # Optimize operations for each model
            for model_name, model_operations in operations_by_model.items():
                optimized_model_ops = optimize_model_operations(model_operations)
                optimized.extend(optimized_model_ops)
            
            return optimized
        
        def optimize_model_operations(operations):
            """Optimize operations for a single model"""
            
            # Look for patterns that can be optimized
            
            # Pattern 1: CreateModel followed by AddField operations
            create_model_op = None
            add_field_ops = []
            other_ops = []
            
            for op in operations:
                if op.__class__.__name__ == 'CreateModel':
                    create_model_op = op
                elif op.__class__.__name__ == 'AddField':
                    add_field_ops.append(op)
                else:
                    other_ops.append(op)
            
            optimized = []
            
            if create_model_op and add_field_ops:
                # Combine CreateModel with AddField operations
                combined_fields = list(create_model_op.fields)
                
                for add_field_op in add_field_ops:
                    combined_fields.append((add_field_op.name, add_field_op.field))
                
                # Create optimized CreateModel operation
                optimized_create = migrations.CreateModel(
                    name=create_model_op.name,
                    fields=combined_fields,
                    options=create_model_op.options,
                    bases=create_model_op.bases,
                    managers=create_model_op.managers,
                )
                
                optimized.append(optimized_create)
            
            elif create_model_op:
                optimized.append(create_model_op)
            
            # Add other operations
            optimized.extend(other_ops)
            
            return optimized
        
        return optimize_operations

Production Squashing Strategies

Safe Production Squashing

class ProductionSquashingStrategy:
    """Strategies for safely squashing migrations in production"""
    
    @staticmethod
    def create_production_squashing_plan():
        """Create plan for squashing migrations in production environment"""
        
        plan = {
            'pre_squashing': [
                "1. Audit all environments to ensure migrations are synchronized",
                "2. Create comprehensive backup of all databases",
                "3. Document current migration state across all environments",
                "4. Freeze new migration creation during squashing process",
                "5. Notify all team members of squashing timeline",
                "6. Prepare rollback procedures"
            ],
            'squashing_process': [
                "1. Create squashed migration in development",
                "2. Test squashed migration on copy of production data",
                "3. Validate that squashed migration produces identical schema",
                "4. Test rollback procedures with squashed migration",
                "5. Deploy squashed migration to staging environment",
                "6. Perform full application testing in staging"
            ],
            'deployment': [
                "1. Schedule maintenance window for production deployment",
                "2. Apply squashed migration to production",
                "3. Verify schema integrity after deployment",
                "4. Run application smoke tests",
                "5. Monitor application performance and error rates",
                "6. Clean up old migration files after successful deployment"
            ],
            'post_deployment': [
                "1. Update deployment documentation",
                "2. Notify team of successful squashing",
                "3. Resume normal migration development process",
                "4. Archive old migration backups after retention period",
                "5. Update CI/CD pipelines if needed"
            ]
        }
        
        return plan
    
    @staticmethod
    def create_zero_downtime_squashing():
        """Create strategy for zero-downtime migration squashing"""
        
        def zero_downtime_squash():
            """Implement zero-downtime squashing strategy"""
            
            # Phase 1: Deploy squashed migration alongside old migrations
            phase1_migration = '''
class Migration(migrations.Migration):
    """
    Phase 1: Squashed migration that coexists with old migrations
    This migration is marked as applied but doesn't actually run
    """
    
    dependencies = [
        ('blog', '0010_latest_individual_migration'),
    ]
    
    # Mark as already applied since schema already exists
    operations = []
    
    # This migration replaces the old ones but doesn't change schema
    replaces = [
        ('blog', '0001_initial'),
        ('blog', '0002_add_author'),
        # ... other migrations
        ('blog', '0010_latest_individual_migration'),
    ]
'''
            
            # Phase 2: Remove old migration files after squashed migration is applied
            phase2_steps = [
                "1. Verify squashed migration is applied in all environments",
                "2. Remove old migration files from codebase",
                "3. Deploy code without old migration files",
                "4. Verify application continues to work normally"
            ]
            
            return phase1_migration, phase2_steps
        
        return zero_downtime_squash
    
    @staticmethod
    def handle_squashing_conflicts():
        """Handle conflicts that arise during squashing"""
        
        conflict_resolution = {
            'dependency_conflicts': {
                'description': 'External apps depend on migrations being squashed',
                'solution': [
                    "1. Identify all external dependencies",
                    "2. Update external app migrations to depend on squashed migration",
                    "3. Coordinate deployment across all affected apps",
                    "4. Test integration between apps after squashing"
                ]
            },
            'data_migration_conflicts': {
                'description': 'Data migrations have complex interdependencies',
                'solution': [
                    "1. Analyze data migration dependencies carefully",
                    "2. Combine data migrations in correct order",
                    "3. Test combined data migration on production-like data",
                    "4. Consider keeping complex data migrations separate"
                ]
            },
            'schema_conflicts': {
                'description': 'Schema changes conflict when combined',
                'solution': [
                    "1. Identify conflicting schema operations",
                    "2. Resolve conflicts by choosing final desired state",
                    "3. Test that final schema matches expectations",
                    "4. Verify all constraints and indexes are correct"
                ]
            },
            'rollback_conflicts': {
                'description': 'Squashed migration cannot be safely rolled back',
                'solution': [
                    "1. Document that squashed migration is not reversible",
                    "2. Create separate rollback procedures if needed",
                    "3. Ensure backup and restore procedures are available",
                    "4. Test rollback procedures in staging environment"
                ]
            }
        }
        
        return conflict_resolution

# Automated squashing tools
class AutomatedSquashingTools:
    """Tools for automating migration squashing"""
    
    @staticmethod
    def create_squashing_script():
        """Create script to automate squashing process"""
        
        script_content = '''#!/bin/bash
# Automated migration squashing script

set -e

APP_NAME=$1
START_MIGRATION=$2
END_MIGRATION=$3

if [ -z "$APP_NAME" ] || [ -z "$START_MIGRATION" ] || [ -z "$END_MIGRATION" ]; then
    echo "Usage: $0 <app_name> <start_migration> <end_migration>"
    exit 1
fi

echo "Starting migration squashing for $APP_NAME: $START_MIGRATION to $END_MIGRATION"

# Backup existing migrations
echo "Creating backup of existing migrations..."
cp -r $APP_NAME/migrations $APP_NAME/migrations_backup_$(date +%Y%m%d_%H%M%S)

# Check current migration state
echo "Checking current migration state..."
python manage.py showmigrations $APP_NAME

# Create squashed migration
echo "Creating squashed migration..."
python manage.py squashmigrations $APP_NAME $START_MIGRATION $END_MIGRATION

# Test squashed migration
echo "Testing squashed migration on test database..."
python manage.py migrate --database=test

# Validate schema
echo "Validating schema..."
python manage.py sqlmigrate $APP_NAME $(ls $APP_NAME/migrations/*squashed*.py | head -1 | sed 's/.*\\///;s/\\.py//')

echo "Squashing completed successfully!"
echo "Please review the squashed migration before deploying to production."
'''
        
        return script_content
    
    @staticmethod
    def create_squashing_validator():
        """Create validator for squashed migrations"""
        
        def validate_squashed_migration(app_label, squashed_migration_name):
            """Validate that squashed migration is correct"""
            
            from django.db.migrations.loader import MigrationLoader
            from django.db.migrations.state import ProjectState
            
            loader = MigrationLoader(connection)
            
            validation_results = {
                'valid': True,
                'errors': [],
                'warnings': []
            }
            
            try:
                # Get squashed migration
                squashed_migration = loader.get_migration(app_label, squashed_migration_name)
                
                # Check replaces attribute
                if not hasattr(squashed_migration, 'replaces') or not squashed_migration.replaces:
                    validation_results['errors'].append(
                        "Squashed migration missing 'replaces' attribute"
                    )
                
                # Validate that replaced migrations exist
                for replaced_app, replaced_name in squashed_migration.replaces:
                    if (replaced_app, replaced_name) not in loader.graph.nodes:
                        validation_results['warnings'].append(
                            f"Replaced migration {replaced_app}.{replaced_name} not found"
                        )
                
                # Check dependencies
                for dep_app, dep_name in squashed_migration.dependencies:
                    if (dep_app, dep_name) not in loader.graph.nodes:
                        validation_results['errors'].append(
                            f"Dependency {dep_app}.{dep_name} not found"
                        )
                
                # Validate operations
                for operation in squashed_migration.operations:
                    if hasattr(operation, 'reversible') and not operation.reversible:
                        validation_results['warnings'].append(
                            f"Operation {operation.__class__.__name__} is not reversible"
                        )
            
            except Exception as e:
                validation_results['valid'] = False
                validation_results['errors'].append(f"Error loading squashed migration: {e}")
            
            return validation_results
        
        return validate_squashed_migration

Migration squashing is a powerful technique for maintaining clean migration histories and improving performance. Proper planning, testing, and deployment strategies ensure that squashing enhances rather than complicates your Django project's migration management.