Deployment

Scaling and Load Balancing

Scaling Django applications requires strategic planning for handling increased traffic, data growth, and user demands. This chapter covers horizontal and vertical scaling strategies, load balancing configurations, auto-scaling implementations, and performance optimization techniques for high-traffic Django applications.

Scaling and Load Balancing

Scaling Django applications requires strategic planning for handling increased traffic, data growth, and user demands. This chapter covers horizontal and vertical scaling strategies, load balancing configurations, auto-scaling implementations, and performance optimization techniques for high-traffic Django applications.

Scaling Fundamentals

Vertical vs Horizontal Scaling

Vertical Scaling (Scale Up)

  • Increase server resources (CPU, RAM, storage)
  • Simpler to implement but has hardware limits
  • Single point of failure
  • Cost increases exponentially

Horizontal Scaling (Scale Out)

  • Add more servers to distribute load
  • Better fault tolerance and unlimited growth potential
  • Requires application architecture changes
  • More complex but cost-effective at scale

Django Scaling Challenges

# Common scaling bottlenecks in Django applications

# 1. Database connections
DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.postgresql',
        'OPTIONS': {
            'MAX_CONNS': 20,  # Limited connections per server
        },
    }
}

# 2. Session storage
SESSION_ENGINE = 'django.contrib.sessions.backends.db'  # Database bottleneck

# 3. Static file serving
STATIC_URL = '/static/'  # Server bandwidth limitation

# 4. Cache invalidation
CACHES = {
    'default': {
        'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',  # Not shared
    }
}

# 5. File uploads and media storage
MEDIA_ROOT = '/path/to/media/'  # Local storage limitation

Load Balancing Strategies

Nginx Load Balancer Configuration

# /etc/nginx/nginx.conf
upstream django_app {
    # Load balancing methods
    least_conn;  # Route to server with fewest active connections
    # ip_hash;   # Route based on client IP (sticky sessions)
    # random;    # Random distribution
    
    # Backend servers
    server 10.0.1.10:8000 weight=3 max_fails=3 fail_timeout=30s;
    server 10.0.1.11:8000 weight=3 max_fails=3 fail_timeout=30s;
    server 10.0.1.12:8000 weight=2 max_fails=3 fail_timeout=30s;
    server 10.0.1.13:8000 backup;  # Backup server
    
    # Health checks
    keepalive 32;
    keepalive_requests 100;
    keepalive_timeout 60s;
}

server {
    listen 80;
    server_name yourdomain.com;
    
    # Rate limiting
    limit_req_zone $binary_remote_addr zone=api:10m rate=100r/m;
    limit_req_zone $binary_remote_addr zone=login:10m rate=5r/m;
    
    # Connection limiting
    limit_conn_zone $binary_remote_addr zone=conn_limit_per_ip:10m;
    limit_conn conn_limit_per_ip 10;
    
    # Static files (served directly by Nginx)
    location /static/ {
        alias /var/www/static/;
        expires 1y;
        add_header Cache-Control "public, immutable";
        
        # Compression
        gzip on;
        gzip_types text/css application/javascript image/svg+xml;
        
        # Multiple static file servers
        try_files $uri @static_fallback;
    }
    
    location @static_fallback {
        proxy_pass http://static_servers;
    }
    
    # API endpoints with rate limiting
    location /api/ {
        limit_req zone=api burst=20 nodelay;
        proxy_pass http://django_app;
        include /etc/nginx/proxy_params;
        
        # Load balancer health checks
        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503;
        proxy_next_upstream_tries 3;
        proxy_next_upstream_timeout 10s;
    }
    
    # Login endpoints with stricter rate limiting
    location /auth/login/ {
        limit_req zone=login burst=3 nodelay;
        proxy_pass http://django_app;
        include /etc/nginx/proxy_params;
    }
    
    # Main application
    location / {
        proxy_pass http://django_app;
        include /etc/nginx/proxy_params;
        
        # Connection pooling
        proxy_http_version 1.1;
        proxy_set_header Connection "";
        
        # Timeouts
        proxy_connect_timeout 5s;
        proxy_send_timeout 60s;
        proxy_read_timeout 60s;
        
        # Buffering
        proxy_buffering on;
        proxy_buffer_size 4k;
        proxy_buffers 8 4k;
        proxy_busy_buffers_size 8k;
    }
    
    # Health check endpoint
    location /health/ {
        proxy_pass http://django_app;
        access_log off;
        
        # Custom health check headers
        proxy_set_header X-Health-Check "true";
    }
}

# Separate upstream for static files
upstream static_servers {
    server 10.0.2.10:80;
    server 10.0.2.11:80;
}

HAProxy Configuration

# /etc/haproxy/haproxy.cfg
global
    daemon
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock mode 660 level admin
    stats timeout 30s
    user haproxy
    group haproxy
    
    # SSL/TLS configuration
    ssl-default-bind-ciphers ECDHE+AESGCM:ECDHE+CHACHA20:RSA+AESGCM:RSA+AES:!aNULL:!MD5:!DSS
    ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets

defaults
    mode http
    timeout connect 5000ms
    timeout client 50000ms
    timeout server 50000ms
    option httplog
    option dontlognull
    option redispatch
    retries 3
    
    # Health checks
    option httpchk GET /health/
    http-check expect status 200

# Statistics page
frontend stats
    bind *:8404
    stats enable
    stats uri /stats
    stats refresh 30s
    stats admin if TRUE

# Frontend configuration
frontend django_frontend
    bind *:80
    bind *:443 ssl crt /etc/ssl/certs/yourdomain.com.pem
    
    # Redirect HTTP to HTTPS
    redirect scheme https if !{ ssl_fc }
    
    # Rate limiting
    stick-table type ip size 100k expire 30s store http_req_rate(10s)
    http-request track-sc0 src
    http-request reject if { sc_http_req_rate(0) gt 20 }
    
    # Route to backend
    default_backend django_backend

# Backend configuration
backend django_backend
    balance roundrobin
    
    # Health checks
    option httpchk GET /health/
    http-check expect status 200
    
    # Servers
    server web1 10.0.1.10:8000 check weight 100 maxconn 100
    server web2 10.0.1.11:8000 check weight 100 maxconn 100
    server web3 10.0.1.12:8000 check weight 80 maxconn 80
    server web4 10.0.1.13:8000 check backup
    
    # Connection settings
    option httpclose
    option forwardfor
    
    # Timeouts
    timeout server 30s
    timeout connect 5s

Database Scaling

Read Replicas Configuration

# settings/database_scaling.py
import os

# Master-slave database configuration
DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.postgresql',
        'NAME': os.environ.get('DB_NAME'),
        'USER': os.environ.get('DB_USER'),
        'PASSWORD': os.environ.get('DB_PASSWORD'),
        'HOST': os.environ.get('DB_MASTER_HOST'),
        'PORT': os.environ.get('DB_PORT', '5432'),
        'OPTIONS': {
            'sslmode': 'require',
        },
        'CONN_MAX_AGE': 600,
    },
    'read_replica_1': {
        'ENGINE': 'django.db.backends.postgresql',
        'NAME': os.environ.get('DB_NAME'),
        'USER': os.environ.get('DB_READ_USER'),
        'PASSWORD': os.environ.get('DB_READ_PASSWORD'),
        'HOST': os.environ.get('DB_REPLICA_1_HOST'),
        'PORT': os.environ.get('DB_PORT', '5432'),
        'OPTIONS': {
            'sslmode': 'require',
        },
        'CONN_MAX_AGE': 600,
    },
    'read_replica_2': {
        'ENGINE': 'django.db.backends.postgresql',
        'NAME': os.environ.get('DB_NAME'),
        'USER': os.environ.get('DB_READ_USER'),
        'PASSWORD': os.environ.get('DB_READ_PASSWORD'),
        'HOST': os.environ.get('DB_REPLICA_2_HOST'),
        'PORT': os.environ.get('DB_PORT', '5432'),
        'OPTIONS': {
            'sslmode': 'require',
        },
        'CONN_MAX_AGE': 600,
    },
}

# Database routing
DATABASE_ROUTERS = ['myproject.routers.DatabaseRouter']
# myproject/routers.py
import random

class DatabaseRouter:
    """
    Route reads to read replicas and writes to master
    """
    
    def __init__(self):
        self.read_databases = ['read_replica_1', 'read_replica_2']
    
    def db_for_read(self, model, **hints):
        """Reading from read replicas"""
        if model._meta.app_label in ['sessions', 'admin']:
            return 'default'  # Always read sessions from master
        
        # Random selection of read replica
        return random.choice(self.read_databases)
    
    def db_for_write(self, model, **hints):
        """Writing to master database"""
        return 'default'
    
    def allow_relation(self, obj1, obj2, **hints):
        """Allow relations if models are in the same app"""
        db_set = {'default', 'read_replica_1', 'read_replica_2'}
        if obj1._state.db in db_set and obj2._state.db in db_set:
            return True
        return None
    
    def allow_migrate(self, db, app_label, model_name=None, **hints):
        """Ensure that migrations only run on master"""
        return db == 'default'

# Advanced database router with load balancing
class LoadBalancedDatabaseRouter:
    """
    Advanced database router with health checks and load balancing
    """
    
    def __init__(self):
        self.read_databases = ['read_replica_1', 'read_replica_2']
        self.database_weights = {
            'read_replica_1': 3,
            'read_replica_2': 2,
        }
        self.failed_databases = set()
    
    def get_read_database(self):
        """Get read database with weighted selection and health checks"""
        available_dbs = [
            db for db in self.read_databases 
            if db not in self.failed_databases
        ]
        
        if not available_dbs:
            # Fallback to master if all replicas are down
            return 'default'
        
        # Weighted random selection
        weights = [self.database_weights.get(db, 1) for db in available_dbs]
        return random.choices(available_dbs, weights=weights)[0]
    
    def db_for_read(self, model, **hints):
        """Route reads with load balancing"""
        # Always read critical data from master
        if model._meta.app_label in ['sessions', 'admin', 'auth']:
            return 'default'
        
        # Check if this is a recent write (read-after-write consistency)
        if hasattr(hints.get('instance'), '_recently_written'):
            return 'default'
        
        return self.get_read_database()
    
    def db_for_write(self, model, **hints):
        """All writes go to master"""
        return 'default'
    
    def allow_migrate(self, db, app_label, model_name=None, **hints):
        """Only migrate on master"""
        return db == 'default'

Database Connection Pooling

# settings/connection_pooling.py
import os

# PgBouncer configuration
DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.postgresql',
        'NAME': os.environ.get('DB_NAME'),
        'USER': os.environ.get('DB_USER'),
        'PASSWORD': os.environ.get('DB_PASSWORD'),
        'HOST': os.environ.get('PGBOUNCER_HOST', 'localhost'),
        'PORT': os.environ.get('PGBOUNCER_PORT', '6432'),
        'OPTIONS': {
            'sslmode': 'require',
            'application_name': 'django-app',
        },
        'CONN_MAX_AGE': 0,  # Disable Django's connection pooling
    }
}

# Alternative: Django-pool for connection pooling
DATABASES = {
    'default': {
        'ENGINE': 'django_pool.backends.postgresql',
        'NAME': os.environ.get('DB_NAME'),
        'USER': os.environ.get('DB_USER'),
        'PASSWORD': os.environ.get('DB_PASSWORD'),
        'HOST': os.environ.get('DB_HOST'),
        'PORT': os.environ.get('DB_PORT', '5432'),
        'OPTIONS': {
            'MAX_CONNS': 20,
            'MIN_CONNS': 5,
            'sslmode': 'require',
        },
    }
}

Caching Strategies

Multi-Level Caching

# settings/caching.py
import os

# Multi-level caching configuration
CACHES = {
    # L1 Cache: Local memory (fastest)
    'local': {
        'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
        'LOCATION': 'local-cache',
        'TIMEOUT': 60,  # 1 minute
        'OPTIONS': {
            'MAX_ENTRIES': 1000,
        },
    },
    
    # L2 Cache: Redis (shared across servers)
    'default': {
        'BACKEND': 'django_redis.cache.RedisCache',
        'LOCATION': [
            f"redis://{os.environ.get('REDIS_HOST_1', 'localhost')}:6379/1",
            f"redis://{os.environ.get('REDIS_HOST_2', 'localhost')}:6380/1",
            f"redis://{os.environ.get('REDIS_HOST_3', 'localhost')}:6381/1",
        ],
        'OPTIONS': {
            'CLIENT_CLASS': 'django_redis.client.ShardClient',
            'CONNECTION_POOL_KWARGS': {
                'max_connections': 50,
                'retry_on_timeout': True,
            },
            'SERIALIZER': 'django_redis.serializers.json.JSONSerializer',
            'COMPRESSOR': 'django_redis.compressors.zlib.ZlibCompressor',
        },
        'KEY_PREFIX': 'myapp',
        'VERSION': 1,
        'TIMEOUT': 300,  # 5 minutes
    },
    
    # L3 Cache: Database cache (persistent)
    'database': {
        'BACKEND': 'django.core.cache.backends.db.DatabaseCache',
        'LOCATION': 'cache_table',
        'TIMEOUT': 3600,  # 1 hour
        'OPTIONS': {
            'MAX_ENTRIES': 10000,
        },
    },
    
    # Session cache
    'sessions': {
        'BACKEND': 'django_redis.cache.RedisCache',
        'LOCATION': f"redis://{os.environ.get('REDIS_SESSION_HOST', 'localhost')}:6379/2",
        'OPTIONS': {
            'CLIENT_CLASS': 'django_redis.client.DefaultClient',
        },
        'KEY_PREFIX': 'sessions',
        'TIMEOUT': 86400,  # 24 hours
    },
}

# Cache middleware
MIDDLEWARE = [
    'django.middleware.cache.UpdateCacheMiddleware',
    # ... other middleware ...
    'django.middleware.cache.FetchFromCacheMiddleware',
]

CACHE_MIDDLEWARE_ALIAS = 'default'
CACHE_MIDDLEWARE_SECONDS = 600
CACHE_MIDDLEWARE_KEY_PREFIX = 'middleware'

# Session configuration
SESSION_ENGINE = 'django.contrib.sessions.backends.cache'
SESSION_CACHE_ALIAS = 'sessions'

Custom Cache Backend

# cache/backends.py
from django.core.cache.backends.base import BaseCache
from django.core.cache import caches
import time

class MultiLevelCache(BaseCache):
    """
    Multi-level cache backend that tries L1 (local) then L2 (Redis)
    """
    
    def __init__(self, server, params):
        super().__init__(server, params)
        self.l1_cache = caches['local']
        self.l2_cache = caches['default']
        self.l1_timeout = 60  # L1 cache timeout
    
    def get(self, key, default=None, version=None):
        """Get from L1 first, then L2"""
        # Try L1 cache first
        value = self.l1_cache.get(key, None, version)
        if value is not None:
            return value
        
        # Try L2 cache
        value = self.l2_cache.get(key, default, version)
        if value is not None and value != default:
            # Store in L1 cache for faster access
            self.l1_cache.set(key, value, self.l1_timeout, version)
        
        return value
    
    def set(self, key, value, timeout=None, version=None):
        """Set in both L1 and L2 caches"""
        # Set in L2 cache (persistent)
        self.l2_cache.set(key, value, timeout, version)
        
        # Set in L1 cache (fast access)
        l1_timeout = min(timeout or self.default_timeout, self.l1_timeout)
        self.l1_cache.set(key, value, l1_timeout, version)
    
    def delete(self, key, version=None):
        """Delete from both caches"""
        self.l1_cache.delete(key, version)
        self.l2_cache.delete(key, version)
    
    def clear(self):
        """Clear both caches"""
        self.l1_cache.clear()
        self.l2_cache.clear()

# Cache warming utility
class CacheWarmer:
    """Utility to warm up caches with frequently accessed data"""
    
    def __init__(self):
        self.cache = caches['default']
    
    def warm_user_data(self, user_ids):
        """Warm cache with user data"""
        from django.contrib.auth.models import User
        
        users = User.objects.filter(id__in=user_ids).select_related('profile')
        for user in users:
            cache_key = f'user:{user.id}'
            user_data = {
                'id': user.id,
                'username': user.username,
                'email': user.email,
                'profile': getattr(user, 'profile', None),
            }
            self.cache.set(cache_key, user_data, timeout=3600)
    
    def warm_popular_content(self):
        """Warm cache with popular content"""
        from myapp.models import Article
        
        popular_articles = Article.objects.filter(
            is_published=True
        ).order_by('-view_count')[:100]
        
        for article in popular_articles:
            cache_key = f'article:{article.id}'
            self.cache.set(cache_key, article, timeout=1800)

Auto-Scaling Implementation

AWS Auto Scaling

# aws/autoscaling.py
import boto3
import json

class DjangoAutoScaler:
    """Auto-scaling manager for Django applications on AWS"""
    
    def __init__(self, region='us-east-1'):
        self.autoscaling = boto3.client('autoscaling', region_name=region)
        self.cloudwatch = boto3.client('cloudwatch', region_name=region)
        self.elbv2 = boto3.client('elbv2', region_name=region)
    
    def create_launch_template(self, template_name, ami_id, instance_type):
        """Create launch template for auto scaling"""
        ec2 = boto3.client('ec2')
        
        user_data = """#!/bin/bash
        yum update -y
        yum install -y docker
        service docker start
        usermod -a -G docker ec2-user
        
        # Pull and run Django application
        docker run -d --name django-app \
            --env-file /opt/django/.env \
            -p 8000:8000 \
            your-registry/django-app:latest
        """
        
        response = ec2.create_launch_template(
            LaunchTemplateName=template_name,
            LaunchTemplateData={
                'ImageId': ami_id,
                'InstanceType': instance_type,
                'SecurityGroupIds': ['sg-12345678'],
                'IamInstanceProfile': {
                    'Name': 'django-instance-profile'
                },
                'UserData': user_data,
                'TagSpecifications': [
                    {
                        'ResourceType': 'instance',
                        'Tags': [
                            {'Key': 'Name', 'Value': 'django-auto-scaled'},
                            {'Key': 'Environment', 'Value': 'production'},
                        ]
                    }
                ]
            }
        )
        return response['LaunchTemplate']['LaunchTemplateId']
    
    def create_auto_scaling_group(self, group_name, launch_template_id, 
                                  target_group_arn, min_size=1, max_size=10, desired_capacity=2):
        """Create auto scaling group"""
        response = self.autoscaling.create_auto_scaling_group(
            AutoScalingGroupName=group_name,
            LaunchTemplate={
                'LaunchTemplateId': launch_template_id,
                'Version': '$Latest'
            },
            MinSize=min_size,
            MaxSize=max_size,
            DesiredCapacity=desired_capacity,
            TargetGroupARNs=[target_group_arn],
            VPCZoneIdentifier='subnet-12345,subnet-67890',
            HealthCheckType='ELB',
            HealthCheckGracePeriod=300,
            DefaultCooldown=300,
            Tags=[
                {
                    'Key': 'Name',
                    'Value': 'django-asg',
                    'PropagateAtLaunch': True,
                    'ResourceId': group_name,
                    'ResourceType': 'auto-scaling-group'
                }
            ]
        )
        return response
    
    def create_scaling_policies(self, group_name):
        """Create scaling policies"""
        # Scale up policy
        scale_up_response = self.autoscaling.put_scaling_policy(
            AutoScalingGroupName=group_name,
            PolicyName=f'{group_name}-scale-up',
            PolicyType='TargetTrackingScaling',
            TargetTrackingConfiguration={
                'TargetValue': 70.0,
                'PredefinedMetricSpecification': {
                    'PredefinedMetricType': 'ASGAverageCPUUtilization'
                },
                'ScaleOutCooldown': 300,
                'ScaleInCooldown': 300
            }
        )
        
        # Scale based on request count
        request_count_policy = self.autoscaling.put_scaling_policy(
            AutoScalingGroupName=group_name,
            PolicyName=f'{group_name}-request-count',
            PolicyType='TargetTrackingScaling',
            TargetTrackingConfiguration={
                'TargetValue': 1000.0,
                'PredefinedMetricSpecification': {
                    'PredefinedMetricType': 'ALBRequestCountPerTarget',
                    'ResourceLabel': 'app/django-alb/1234567890123456/targetgroup/django-tg/1234567890123456'
                }
            }
        )
        
        return scale_up_response, request_count_policy
    
    def create_custom_metrics_alarm(self, group_name):
        """Create custom CloudWatch alarms for Django-specific metrics"""
        # Database connection alarm
        self.cloudwatch.put_metric_alarm(
            AlarmName=f'{group_name}-db-connections',
            ComparisonOperator='GreaterThanThreshold',
            EvaluationPeriods=2,
            MetricName='DatabaseConnections',
            Namespace='Django/Application',
            Period=300,
            Statistic='Average',
            Threshold=80.0,
            ActionsEnabled=True,
            AlarmActions=[
                f'arn:aws:autoscaling:us-east-1:123456789012:scalingPolicy:policy-id:autoScalingGroupName/{group_name}:policyName/{group_name}-scale-up'
            ],
            AlarmDescription='Scale up when database connections are high',
            Unit='Count'
        )
        
        # Response time alarm
        self.cloudwatch.put_metric_alarm(
            AlarmName=f'{group_name}-response-time',
            ComparisonOperator='GreaterThanThreshold',
            EvaluationPeriods=3,
            MetricName='TargetResponseTime',
            Namespace='AWS/ApplicationELB',
            Period=300,
            Statistic='Average',
            Threshold=2.0,
            ActionsEnabled=True,
            AlarmActions=[
                f'arn:aws:autoscaling:us-east-1:123456789012:scalingPolicy:policy-id:autoScalingGroupName/{group_name}:policyName/{group_name}-scale-up'
            ],
            AlarmDescription='Scale up when response time is high',
            Unit='Seconds'
        )

Kubernetes Horizontal Pod Autoscaler

# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: django-hpa
  namespace: production
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: django-app
  minReplicas: 3
  maxReplicas: 50
  metrics:
  # CPU-based scaling
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  
  # Memory-based scaling
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  
  # Custom metrics scaling
  - type: Pods
    pods:
      metric:
        name: django_requests_per_second
      target:
        type: AverageValue
        averageValue: "100"
  
  # External metrics (e.g., queue length)
  - type: External
    external:
      metric:
        name: celery_queue_length
        selector:
          matchLabels:
            queue: "default"
      target:
        type: Value
        value: "50"

  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
      - type: Pods
        value: 2
        periodSeconds: 60
      selectPolicy: Min
    scaleUp:
      stabilizationWindowSeconds: 60
      policies:
      - type: Percent
        value: 50
        periodSeconds: 60
      - type: Pods
        value: 5
        periodSeconds: 60
      selectPolicy: Max

---
# Vertical Pod Autoscaler
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
  name: django-vpa
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: django-app
  updatePolicy:
    updateMode: "Auto"
  resourcePolicy:
    containerPolicies:
    - containerName: django
      minAllowed:
        cpu: 100m
        memory: 128Mi
      maxAllowed:
        cpu: 2
        memory: 2Gi
      controlledResources: ["cpu", "memory"]

Custom Metrics for Scaling

# monitoring/metrics.py
import time
import threading
from collections import defaultdict, deque
from django.core.cache import cache
from django.db import connection
import psutil

class ScalingMetricsCollector:
    """Collect metrics for auto-scaling decisions"""
    
    def __init__(self):
        self.request_times = deque(maxlen=1000)
        self.request_count = defaultdict(int)
        self.error_count = defaultdict(int)
        self.active_connections = 0
        self.lock = threading.Lock()
    
    def record_request(self, response_time, status_code, endpoint):
        """Record request metrics"""
        with self.lock:
            current_minute = int(time.time() // 60)
            
            self.request_times.append(response_time)
            self.request_count[current_minute] += 1
            
            if status_code >= 400:
                self.error_count[current_minute] += 1
    
    def get_scaling_metrics(self):
        """Get current metrics for scaling decisions"""
        with self.lock:
            current_minute = int(time.time() // 60)
            
            # Request rate (requests per minute)
            request_rate = self.request_count.get(current_minute, 0)
            
            # Average response time
            avg_response_time = (
                sum(self.request_times) / len(self.request_times)
                if self.request_times else 0
            )
            
            # Error rate
            total_requests = self.request_count.get(current_minute, 0)
            error_rate = (
                self.error_count.get(current_minute, 0) / total_requests
                if total_requests > 0 else 0
            )
            
            # System metrics
            cpu_percent = psutil.cpu_percent(interval=1)
            memory_percent = psutil.virtual_memory().percent
            
            # Database connections
            db_connections = self.get_db_connection_count()
            
            # Cache hit rate
            cache_stats = self.get_cache_stats()
            
            return {
                'timestamp': time.time(),
                'request_rate': request_rate,
                'avg_response_time': avg_response_time,
                'error_rate': error_rate,
                'cpu_percent': cpu_percent,
                'memory_percent': memory_percent,
                'db_connections': db_connections,
                'cache_hit_rate': cache_stats.get('hit_rate', 0),
                'active_users': self.get_active_user_count(),
            }
    
    def get_db_connection_count(self):
        """Get current database connection count"""
        try:
            with connection.cursor() as cursor:
                cursor.execute(
                    "SELECT count(*) FROM pg_stat_activity WHERE state = 'active'"
                )
                return cursor.fetchone()[0]
        except Exception:
            return 0
    
    def get_cache_stats(self):
        """Get cache statistics"""
        try:
            # Redis cache stats
            from django_redis import get_redis_connection
            redis_conn = get_redis_connection("default")
            info = redis_conn.info()
            
            hits = info.get('keyspace_hits', 0)
            misses = info.get('keyspace_misses', 0)
            total = hits + misses
            
            return {
                'hits': hits,
                'misses': misses,
                'hit_rate': hits / total if total > 0 else 0,
            }
        except Exception:
            return {'hit_rate': 0}
    
    def get_active_user_count(self):
        """Get count of active users"""
        try:
            # Count active sessions
            from django.contrib.sessions.models import Session
            from django.utils import timezone
            
            active_sessions = Session.objects.filter(
                expire_date__gte=timezone.now()
            ).count()
            
            return active_sessions
        except Exception:
            return 0
    
    def should_scale_up(self):
        """Determine if scaling up is needed"""
        metrics = self.get_scaling_metrics()
        
        # Scale up conditions
        conditions = [
            metrics['cpu_percent'] > 80,
            metrics['memory_percent'] > 85,
            metrics['avg_response_time'] > 2.0,
            metrics['error_rate'] > 0.05,
            metrics['request_rate'] > 1000,
            metrics['db_connections'] > 80,
        ]
        
        # Scale up if any 2 conditions are met
        return sum(conditions) >= 2
    
    def should_scale_down(self):
        """Determine if scaling down is possible"""
        metrics = self.get_scaling_metrics()
        
        # Scale down conditions (all must be met)
        conditions = [
            metrics['cpu_percent'] < 30,
            metrics['memory_percent'] < 40,
            metrics['avg_response_time'] < 0.5,
            metrics['error_rate'] < 0.01,
            metrics['request_rate'] < 100,
        ]
        
        return all(conditions)

# Global metrics collector
scaling_metrics = ScalingMetricsCollector()

# Middleware to collect metrics
class ScalingMetricsMiddleware:
    """Middleware to collect scaling metrics"""
    
    def __init__(self, get_response):
        self.get_response = get_response
    
    def __call__(self, request):
        start_time = time.time()
        
        response = self.get_response(request)
        
        response_time = time.time() - start_time
        scaling_metrics.record_request(
            response_time=response_time,
            status_code=response.status_code,
            endpoint=request.path
        )
        
        return response

This comprehensive scaling and load balancing guide provides all the tools and strategies needed to build highly scalable Django applications that can handle massive traffic loads efficiently.