Performance and Optimization

Query Optimization

Database queries are often the primary performance bottleneck in Django applications. This chapter covers comprehensive query optimization techniques, from eliminating N+1 queries to implementing advanced database optimization strategies that can improve application performance by orders of magnitude.

Query Optimization

Database queries are often the primary performance bottleneck in Django applications. This chapter covers comprehensive query optimization techniques, from eliminating N+1 queries to implementing advanced database optimization strategies that can improve application performance by orders of magnitude.

Understanding Django ORM Query Execution

Query Evaluation and Lazy Loading

Django QuerySets are lazy - they don't hit the database until you actually need the data:

# No database query executed yet
articles = Article.objects.all()
published_articles = articles.filter(is_published=True)
recent_articles = published_articles.order_by('-created_at')

# Database query executed here
for article in recent_articles[:10]:
    print(article.title)

# Another query executed here
count = recent_articles.count()

Query Execution Analysis

from django.db import connection
from django.test.utils import override_settings

@override_settings(DEBUG=True)
def analyze_queries():
    # Reset query log
    connection.queries_log.clear()
    
    # Execute your code
    articles = Article.objects.select_related('author').all()[:10]
    for article in articles:
        print(f"{article.title} by {article.author.name}")
    
    # Analyze queries
    print(f"Number of queries: {len(connection.queries)}")
    for query in connection.queries:
        print(f"Query: {query['sql']}")
        print(f"Time: {query['time']}s")

The N+1 Query Problem

The N+1 query problem is the most common performance issue in Django applications:

Identifying N+1 Queries

# BAD: N+1 Query Problem
def article_list_bad(request):
    articles = Article.objects.all()[:10]  # 1 query
    
    for article in articles:
        # Each of these creates a new query!
        print(article.author.name)      # Query 2, 3, 4...
        print(article.category.name)    # Query 12, 13, 14...
        print(article.tags.count())     # Query 22, 23, 24...
    
    # Total: 1 + 10 + 10 + 10 = 31 queries!
    return render(request, 'articles/list.html', {'articles': articles})

# GOOD: Optimized Version
def article_list_good(request):
    articles = Article.objects.select_related(
        'author',
        'category'
    ).prefetch_related(
        'tags'
    ).all()[:10]  # Only 3 queries total!
    
    for article in articles:
        print(article.author.name)      # No additional query
        print(article.category.name)    # No additional query
        print(article.tags.count())     # No additional query
    
    return render(request, 'articles/list.html', {'articles': articles})

Advanced N+1 Detection

# Custom middleware to detect N+1 queries
import logging
from django.db import connection
from django.utils.deprecation import MiddlewareMixin

logger = logging.getLogger(__name__)

class N1QueryDetectionMiddleware(MiddlewareMixin):
    def process_request(self, request):
        request.initial_queries = len(connection.queries)
    
    def process_response(self, request, response):
        if hasattr(request, 'initial_queries'):
            num_queries = len(connection.queries) - request.initial_queries
            
            # Detect potential N+1 queries
            if num_queries > 10:
                similar_queries = self.find_similar_queries(
                    connection.queries[request.initial_queries:]
                )
                
                if similar_queries:
                    logger.warning(
                        f"Potential N+1 query detected on {request.path}: "
                        f"{num_queries} queries, {len(similar_queries)} similar patterns"
                    )
        
        return response
    
    def find_similar_queries(self, queries):
        """Find queries with similar patterns (potential N+1)"""
        patterns = {}
        for query in queries:
            # Normalize query by replacing numbers with placeholders
            normalized = re.sub(r'\d+', 'X', query['sql'])
            patterns[normalized] = patterns.get(normalized, 0) + 1
        
        # Return patterns that appear more than once
        return {k: v for k, v in patterns.items() if v > 1}

select_related performs SQL JOINs to fetch related objects in a single query:

# Without select_related: 1 + N queries
articles = Article.objects.all()
for article in articles:
    print(article.author.name)  # New query for each article

# With select_related: 1 query
articles = Article.objects.select_related('author').all()
for article in articles:
    print(article.author.name)  # No additional query

Multiple Relations

# Select multiple related objects
articles = Article.objects.select_related(
    'author',
    'category',
    'author__profile'  # Follow foreign keys through multiple levels
).all()

# Generated SQL includes JOINs:
# SELECT article.*, author.*, category.*, profile.*
# FROM article
# JOIN author ON article.author_id = author.id
# JOIN category ON article.category_id = category.id
# JOIN profile ON author.profile_id = profile.id
def get_articles_with_related(include_author=True, include_category=True):
    queryset = Article.objects.all()
    
    select_related_fields = []
    if include_author:
        select_related_fields.append('author')
    if include_category:
        select_related_fields.append('category')
    
    if select_related_fields:
        queryset = queryset.select_related(*select_related_fields)
    
    return queryset
import time
from django.test import TestCase

class SelectRelatedPerformanceTest(TestCase):
    def setUp(self):
        # Create test data
        for i in range(100):
            author = Author.objects.create(name=f"Author {i}")
            Article.objects.create(
                title=f"Article {i}",
                author=author,
                content="Test content"
            )
    
    def test_without_select_related(self):
        start_time = time.time()
        
        articles = Article.objects.all()
        author_names = [article.author.name for article in articles]
        
        end_time = time.time()
        print(f"Without select_related: {end_time - start_time:.4f}s")
        print(f"Queries executed: {len(connection.queries)}")
    
    def test_with_select_related(self):
        start_time = time.time()
        
        articles = Article.objects.select_related('author').all()
        author_names = [article.author.name for article in articles]
        
        end_time = time.time()
        print(f"With select_related: {end_time - start_time:.4f}s")
        print(f"Queries executed: {len(connection.queries)}")

prefetch_related handles many-to-many and reverse foreign key relationships:

# Without prefetch_related: 1 + N queries
articles = Article.objects.all()
for article in articles:
    tags = list(article.tags.all())  # New query for each article

# With prefetch_related: 2 queries
articles = Article.objects.prefetch_related('tags').all()
for article in articles:
    tags = list(article.tags.all())  # No additional query

Advanced Prefetch Patterns

from django.db.models import Prefetch

# Custom prefetch with filtering
articles = Article.objects.prefetch_related(
    Prefetch(
        'comments',
        queryset=Comment.objects.filter(is_approved=True).select_related('author'),
        to_attr='approved_comments'
    )
).all()

# Access prefetched data
for article in articles:
    for comment in article.approved_comments:
        print(f"Comment by {comment.author.name}: {comment.content}")

Nested Prefetching

# Prefetch related objects of related objects
articles = Article.objects.prefetch_related(
    'tags',
    'comments__author',
    'comments__replies'
).all()

# Multiple levels of prefetching
users = User.objects.prefetch_related(
    'articles__tags',
    'articles__comments__author'
).all()

Prefetch with Annotations

from django.db.models import Count, Avg

# Prefetch with aggregations
articles = Article.objects.prefetch_related(
    Prefetch(
        'comments',
        queryset=Comment.objects.annotate(
            reply_count=Count('replies')
        ).select_related('author')
    )
).all()

Query Optimization Techniques

Using Only and Defer

Limit the fields retrieved from the database:

# Only retrieve specific fields
articles = Article.objects.only('title', 'slug', 'created_at').all()

# Defer expensive fields
articles = Article.objects.defer('content', 'metadata').all()

# Combining with select_related
articles = Article.objects.select_related('author').only(
    'title', 'slug', 'author__name'
).all()

Database Functions and Aggregations

Perform calculations in the database instead of Python:

from django.db.models import Count, Sum, Avg, F, Case, When

# Aggregations
stats = Article.objects.aggregate(
    total_articles=Count('id'),
    avg_word_count=Avg('word_count'),
    total_views=Sum('view_count')
)

# Annotations
articles = Article.objects.annotate(
    comment_count=Count('comments'),
    avg_rating=Avg('ratings__score'),
    is_popular=Case(
        When(view_count__gt=1000, then=True),
        default=False
    )
).all()

# F expressions for database-level operations
Article.objects.filter(view_count__gt=F('comment_count') * 10)

Bulk Operations

Use bulk operations for better performance:

# Bulk create
articles = [
    Article(title=f"Article {i}", content=f"Content {i}")
    for i in range(1000)
]
Article.objects.bulk_create(articles, batch_size=100)

# Bulk update
Article.objects.filter(is_published=False).bulk_update([
    Article(id=1, view_count=100),
    Article(id=2, view_count=200),
], ['view_count'], batch_size=100)

# Bulk delete
Article.objects.filter(created_at__lt=timezone.now() - timedelta(days=365)).delete()

Raw SQL for Complex Queries

Sometimes raw SQL is more efficient:

# Complex query that's hard to express in ORM
articles = Article.objects.raw("""
    SELECT a.*, 
           COUNT(c.id) as comment_count,
           AVG(r.score) as avg_rating
    FROM articles_article a
    LEFT JOIN articles_comment c ON a.id = c.article_id
    LEFT JOIN articles_rating r ON a.id = r.article_id
    WHERE a.is_published = true
    GROUP BY a.id
    HAVING COUNT(c.id) > 5
    ORDER BY avg_rating DESC, comment_count DESC
    LIMIT 10
""")

# Using extra() for database functions
articles = Article.objects.extra(
    select={
        'comment_count': 'SELECT COUNT(*) FROM comments WHERE article_id = articles_article.id'
    }
).all()

Database Index Optimization

Identifying Missing Indexes

# Django management command to analyze slow queries
from django.core.management.base import BaseCommand
from django.db import connection

class Command(BaseCommand):
    help = 'Analyze slow queries and suggest indexes'
    
    def handle(self, *args, **options):
        with connection.cursor() as cursor:
            # PostgreSQL: Find slow queries
            cursor.execute("""
                SELECT query, mean_time, calls, total_time
                FROM pg_stat_statements
                WHERE mean_time > 100
                ORDER BY mean_time DESC
                LIMIT 10
            """)
            
            slow_queries = cursor.fetchall()
            
            for query, mean_time, calls, total_time in slow_queries:
                self.stdout.write(f"Query: {query[:100]}...")
                self.stdout.write(f"Mean time: {mean_time}ms")
                self.stdout.write(f"Calls: {calls}")
                self.stdout.write("---")

Creating Effective Indexes

# models.py
from django.db import models

class Article(models.Model):
    title = models.CharField(max_length=200)
    slug = models.SlugField(unique=True)
    author = models.ForeignKey(User, on_delete=models.CASCADE)
    category = models.ForeignKey(Category, on_delete=models.CASCADE)
    is_published = models.BooleanField(default=False)
    created_at = models.DateTimeField(auto_now_add=True)
    view_count = models.PositiveIntegerField(default=0)
    
    class Meta:
        # Single column indexes
        indexes = [
            models.Index(fields=['is_published']),
            models.Index(fields=['created_at']),
            models.Index(fields=['view_count']),
        ]
        
        # Composite indexes for common query patterns
        indexes = [
            # For queries filtering by published status and ordering by date
            models.Index(fields=['is_published', '-created_at']),
            
            # For queries filtering by category and published status
            models.Index(fields=['category', 'is_published']),
            
            # For full-text search (PostgreSQL)
            models.Index(fields=['title', 'content']),
        ]

# Migration for adding indexes
from django.db import migrations, models

class Migration(migrations.Migration):
    dependencies = [
        ('articles', '0001_initial'),
    ]
    
    operations = [
        migrations.RunSQL(
            # PostgreSQL: Create partial index
            "CREATE INDEX CONCURRENTLY idx_published_articles "
            "ON articles_article (created_at DESC) "
            "WHERE is_published = true;",
            
            reverse_sql="DROP INDEX idx_published_articles;"
        ),
        
        migrations.RunSQL(
            # PostgreSQL: Create GIN index for full-text search
            "CREATE INDEX CONCURRENTLY idx_article_search "
            "ON articles_article USING gin(to_tsvector('english', title || ' ' || content));",
            
            reverse_sql="DROP INDEX idx_article_search;"
        ),
    ]

Index Usage Analysis

# Analyze index usage (PostgreSQL)
def analyze_index_usage():
    with connection.cursor() as cursor:
        cursor.execute("""
            SELECT 
                schemaname,
                tablename,
                indexname,
                idx_tup_read,
                idx_tup_fetch,
                idx_scan
            FROM pg_stat_user_indexes
            WHERE schemaname = 'public'
            ORDER BY idx_scan DESC
        """)
        
        indexes = cursor.fetchall()
        
        print("Index Usage Statistics:")
        for schema, table, index, tup_read, tup_fetch, scans in indexes:
            print(f"{table}.{index}: {scans} scans, {tup_read} tuples read")

Query Performance Monitoring

Custom Query Profiler

import time
import logging
from django.db import connection
from contextlib import contextmanager

logger = logging.getLogger(__name__)

@contextmanager
def query_profiler(description="Query block"):
    """Context manager to profile database queries"""
    initial_queries = len(connection.queries)
    start_time = time.time()
    
    yield
    
    end_time = time.time()
    num_queries = len(connection.queries) - initial_queries
    duration = end_time - start_time
    
    logger.info(
        f"{description}: {num_queries} queries in {duration:.4f}s"
    )
    
    # Log slow queries
    if duration > 0.5:
        recent_queries = connection.queries[initial_queries:]
        for query in recent_queries:
            if float(query['time']) > 0.1:
                logger.warning(f"Slow query ({query['time']}s): {query['sql'][:200]}...")

# Usage
def expensive_view(request):
    with query_profiler("Article list with comments"):
        articles = Article.objects.select_related('author').prefetch_related(
            'comments__author'
        ).all()[:20]
        
        return render(request, 'articles.html', {'articles': articles})

Query Optimization Decorator

from functools import wraps

def optimize_queries(max_queries=10):
    """Decorator to ensure views don't exceed query limits"""
    def decorator(view_func):
        @wraps(view_func)
        def wrapper(request, *args, **kwargs):
            initial_queries = len(connection.queries)
            
            response = view_func(request, *args, **kwargs)
            
            num_queries = len(connection.queries) - initial_queries
            
            if num_queries > max_queries:
                logger.warning(
                    f"View {view_func.__name__} executed {num_queries} queries "
                    f"(limit: {max_queries})"
                )
            
            return response
        
        return wrapper
    return decorator

# Usage
@optimize_queries(max_queries=5)
def article_list(request):
    articles = Article.objects.select_related('author').all()[:20]
    return render(request, 'articles.html', {'articles': articles})

Advanced Query Optimization Patterns

Query Result Caching

from django.core.cache import cache
from django.core.cache.utils import make_template_fragment_key

def get_popular_articles(limit=10):
    """Get popular articles with caching"""
    cache_key = f'popular_articles_{limit}'
    articles = cache.get(cache_key)
    
    if articles is None:
        articles = list(
            Article.objects.select_related('author', 'category')
            .filter(is_published=True)
            .order_by('-view_count')[:limit]
        )
        
        # Cache for 1 hour
        cache.set(cache_key, articles, 3600)
    
    return articles

# Cache invalidation
def invalidate_article_cache(article):
    """Invalidate related caches when article changes"""
    cache_keys = [
        'popular_articles_10',
        'popular_articles_20',
        f'article_{article.id}',
        f'category_articles_{article.category_id}',
    ]
    
    cache.delete_many(cache_keys)

Database Connection Optimization

# settings.py
DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.postgresql',
        'OPTIONS': {
            # Connection pooling
            'MAX_CONNS': 20,
            'MIN_CONNS': 5,
            
            # Query optimization
            'OPTIONS': {
                'MAX_CONNS': 20,
                'MIN_CONNS': 5,
                'options': '-c default_transaction_isolation=read_committed'
            }
        },
        'CONN_MAX_AGE': 600,  # Connection reuse
        'CONN_HEALTH_CHECKS': True,
    }
}

# Custom database router for read/write splitting
class DatabaseRouter:
    def db_for_read(self, model, **hints):
        """Reading from the read database."""
        return 'read'

    def db_for_write(self, model, **hints):
        """Writing to the write database."""
        return 'write'

Query Optimization Testing

from django.test import TestCase, override_settings
from django.test.utils import override_settings

class QueryOptimizationTest(TestCase):
    def setUp(self):
        # Create test data
        self.create_test_data()
    
    def test_article_list_queries(self):
        """Test that article list doesn't have N+1 queries"""
        with self.assertNumQueries(3):  # articles, authors, tags
            articles = Article.objects.select_related('author').prefetch_related('tags').all()[:10]
            
            # Force evaluation
            for article in articles:
                _ = article.author.name
                _ = list(article.tags.all())
    
    def test_query_performance(self):
        """Test query performance benchmarks"""
        import time
        
        start_time = time.time()
        
        # Your optimized query
        articles = Article.objects.select_related('author').prefetch_related('tags').all()[:100]
        list(articles)  # Force evaluation
        
        end_time = time.time()
        duration = end_time - start_time
        
        # Assert performance requirement
        self.assertLess(duration, 0.5, "Query took too long")
    
    @override_settings(DEBUG=True)
    def test_query_analysis(self):
        """Analyze queries for optimization opportunities"""
        from django.db import connection
        
        connection.queries_log.clear()
        
        # Execute your view logic
        articles = Article.objects.all()[:10]
        for article in articles:
            _ = article.author.name
        
        queries = connection.queries
        
        # Analyze query patterns
        similar_queries = {}
        for query in queries:
            sql = query['sql']
            # Group similar queries
            pattern = re.sub(r'\d+', 'N', sql)
            similar_queries[pattern] = similar_queries.get(pattern, 0) + 1
        
        # Check for N+1 patterns
        for pattern, count in similar_queries.items():
            if count > 5:
                self.fail(f"Potential N+1 query detected: {pattern} (count: {count})")

This comprehensive query optimization guide provides the foundation for building high-performance Django applications by eliminating database bottlenecks and implementing efficient data access patterns.