Database queries are often the primary performance bottleneck in Django applications. This chapter covers comprehensive query optimization techniques, from eliminating N+1 queries to implementing advanced database optimization strategies that can improve application performance by orders of magnitude.
Django QuerySets are lazy - they don't hit the database until you actually need the data:
# No database query executed yet
articles = Article.objects.all()
published_articles = articles.filter(is_published=True)
recent_articles = published_articles.order_by('-created_at')
# Database query executed here
for article in recent_articles[:10]:
print(article.title)
# Another query executed here
count = recent_articles.count()
from django.db import connection
from django.test.utils import override_settings
@override_settings(DEBUG=True)
def analyze_queries():
# Reset query log
connection.queries_log.clear()
# Execute your code
articles = Article.objects.select_related('author').all()[:10]
for article in articles:
print(f"{article.title} by {article.author.name}")
# Analyze queries
print(f"Number of queries: {len(connection.queries)}")
for query in connection.queries:
print(f"Query: {query['sql']}")
print(f"Time: {query['time']}s")
The N+1 query problem is the most common performance issue in Django applications:
# BAD: N+1 Query Problem
def article_list_bad(request):
articles = Article.objects.all()[:10] # 1 query
for article in articles:
# Each of these creates a new query!
print(article.author.name) # Query 2, 3, 4...
print(article.category.name) # Query 12, 13, 14...
print(article.tags.count()) # Query 22, 23, 24...
# Total: 1 + 10 + 10 + 10 = 31 queries!
return render(request, 'articles/list.html', {'articles': articles})
# GOOD: Optimized Version
def article_list_good(request):
articles = Article.objects.select_related(
'author',
'category'
).prefetch_related(
'tags'
).all()[:10] # Only 3 queries total!
for article in articles:
print(article.author.name) # No additional query
print(article.category.name) # No additional query
print(article.tags.count()) # No additional query
return render(request, 'articles/list.html', {'articles': articles})
# Custom middleware to detect N+1 queries
import logging
from django.db import connection
from django.utils.deprecation import MiddlewareMixin
logger = logging.getLogger(__name__)
class N1QueryDetectionMiddleware(MiddlewareMixin):
def process_request(self, request):
request.initial_queries = len(connection.queries)
def process_response(self, request, response):
if hasattr(request, 'initial_queries'):
num_queries = len(connection.queries) - request.initial_queries
# Detect potential N+1 queries
if num_queries > 10:
similar_queries = self.find_similar_queries(
connection.queries[request.initial_queries:]
)
if similar_queries:
logger.warning(
f"Potential N+1 query detected on {request.path}: "
f"{num_queries} queries, {len(similar_queries)} similar patterns"
)
return response
def find_similar_queries(self, queries):
"""Find queries with similar patterns (potential N+1)"""
patterns = {}
for query in queries:
# Normalize query by replacing numbers with placeholders
normalized = re.sub(r'\d+', 'X', query['sql'])
patterns[normalized] = patterns.get(normalized, 0) + 1
# Return patterns that appear more than once
return {k: v for k, v in patterns.items() if v > 1}
select_related performs SQL JOINs to fetch related objects in a single query:
# Without select_related: 1 + N queries
articles = Article.objects.all()
for article in articles:
print(article.author.name) # New query for each article
# With select_related: 1 query
articles = Article.objects.select_related('author').all()
for article in articles:
print(article.author.name) # No additional query
# Select multiple related objects
articles = Article.objects.select_related(
'author',
'category',
'author__profile' # Follow foreign keys through multiple levels
).all()
# Generated SQL includes JOINs:
# SELECT article.*, author.*, category.*, profile.*
# FROM article
# JOIN author ON article.author_id = author.id
# JOIN category ON article.category_id = category.id
# JOIN profile ON author.profile_id = profile.id
def get_articles_with_related(include_author=True, include_category=True):
queryset = Article.objects.all()
select_related_fields = []
if include_author:
select_related_fields.append('author')
if include_category:
select_related_fields.append('category')
if select_related_fields:
queryset = queryset.select_related(*select_related_fields)
return queryset
import time
from django.test import TestCase
class SelectRelatedPerformanceTest(TestCase):
def setUp(self):
# Create test data
for i in range(100):
author = Author.objects.create(name=f"Author {i}")
Article.objects.create(
title=f"Article {i}",
author=author,
content="Test content"
)
def test_without_select_related(self):
start_time = time.time()
articles = Article.objects.all()
author_names = [article.author.name for article in articles]
end_time = time.time()
print(f"Without select_related: {end_time - start_time:.4f}s")
print(f"Queries executed: {len(connection.queries)}")
def test_with_select_related(self):
start_time = time.time()
articles = Article.objects.select_related('author').all()
author_names = [article.author.name for article in articles]
end_time = time.time()
print(f"With select_related: {end_time - start_time:.4f}s")
print(f"Queries executed: {len(connection.queries)}")
prefetch_related handles many-to-many and reverse foreign key relationships:
# Without prefetch_related: 1 + N queries
articles = Article.objects.all()
for article in articles:
tags = list(article.tags.all()) # New query for each article
# With prefetch_related: 2 queries
articles = Article.objects.prefetch_related('tags').all()
for article in articles:
tags = list(article.tags.all()) # No additional query
from django.db.models import Prefetch
# Custom prefetch with filtering
articles = Article.objects.prefetch_related(
Prefetch(
'comments',
queryset=Comment.objects.filter(is_approved=True).select_related('author'),
to_attr='approved_comments'
)
).all()
# Access prefetched data
for article in articles:
for comment in article.approved_comments:
print(f"Comment by {comment.author.name}: {comment.content}")
# Prefetch related objects of related objects
articles = Article.objects.prefetch_related(
'tags',
'comments__author',
'comments__replies'
).all()
# Multiple levels of prefetching
users = User.objects.prefetch_related(
'articles__tags',
'articles__comments__author'
).all()
from django.db.models import Count, Avg
# Prefetch with aggregations
articles = Article.objects.prefetch_related(
Prefetch(
'comments',
queryset=Comment.objects.annotate(
reply_count=Count('replies')
).select_related('author')
)
).all()
Limit the fields retrieved from the database:
# Only retrieve specific fields
articles = Article.objects.only('title', 'slug', 'created_at').all()
# Defer expensive fields
articles = Article.objects.defer('content', 'metadata').all()
# Combining with select_related
articles = Article.objects.select_related('author').only(
'title', 'slug', 'author__name'
).all()
Perform calculations in the database instead of Python:
from django.db.models import Count, Sum, Avg, F, Case, When
# Aggregations
stats = Article.objects.aggregate(
total_articles=Count('id'),
avg_word_count=Avg('word_count'),
total_views=Sum('view_count')
)
# Annotations
articles = Article.objects.annotate(
comment_count=Count('comments'),
avg_rating=Avg('ratings__score'),
is_popular=Case(
When(view_count__gt=1000, then=True),
default=False
)
).all()
# F expressions for database-level operations
Article.objects.filter(view_count__gt=F('comment_count') * 10)
Use bulk operations for better performance:
# Bulk create
articles = [
Article(title=f"Article {i}", content=f"Content {i}")
for i in range(1000)
]
Article.objects.bulk_create(articles, batch_size=100)
# Bulk update
Article.objects.filter(is_published=False).bulk_update([
Article(id=1, view_count=100),
Article(id=2, view_count=200),
], ['view_count'], batch_size=100)
# Bulk delete
Article.objects.filter(created_at__lt=timezone.now() - timedelta(days=365)).delete()
Sometimes raw SQL is more efficient:
# Complex query that's hard to express in ORM
articles = Article.objects.raw("""
SELECT a.*,
COUNT(c.id) as comment_count,
AVG(r.score) as avg_rating
FROM articles_article a
LEFT JOIN articles_comment c ON a.id = c.article_id
LEFT JOIN articles_rating r ON a.id = r.article_id
WHERE a.is_published = true
GROUP BY a.id
HAVING COUNT(c.id) > 5
ORDER BY avg_rating DESC, comment_count DESC
LIMIT 10
""")
# Using extra() for database functions
articles = Article.objects.extra(
select={
'comment_count': 'SELECT COUNT(*) FROM comments WHERE article_id = articles_article.id'
}
).all()
# Django management command to analyze slow queries
from django.core.management.base import BaseCommand
from django.db import connection
class Command(BaseCommand):
help = 'Analyze slow queries and suggest indexes'
def handle(self, *args, **options):
with connection.cursor() as cursor:
# PostgreSQL: Find slow queries
cursor.execute("""
SELECT query, mean_time, calls, total_time
FROM pg_stat_statements
WHERE mean_time > 100
ORDER BY mean_time DESC
LIMIT 10
""")
slow_queries = cursor.fetchall()
for query, mean_time, calls, total_time in slow_queries:
self.stdout.write(f"Query: {query[:100]}...")
self.stdout.write(f"Mean time: {mean_time}ms")
self.stdout.write(f"Calls: {calls}")
self.stdout.write("---")
# models.py
from django.db import models
class Article(models.Model):
title = models.CharField(max_length=200)
slug = models.SlugField(unique=True)
author = models.ForeignKey(User, on_delete=models.CASCADE)
category = models.ForeignKey(Category, on_delete=models.CASCADE)
is_published = models.BooleanField(default=False)
created_at = models.DateTimeField(auto_now_add=True)
view_count = models.PositiveIntegerField(default=0)
class Meta:
# Single column indexes
indexes = [
models.Index(fields=['is_published']),
models.Index(fields=['created_at']),
models.Index(fields=['view_count']),
]
# Composite indexes for common query patterns
indexes = [
# For queries filtering by published status and ordering by date
models.Index(fields=['is_published', '-created_at']),
# For queries filtering by category and published status
models.Index(fields=['category', 'is_published']),
# For full-text search (PostgreSQL)
models.Index(fields=['title', 'content']),
]
# Migration for adding indexes
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('articles', '0001_initial'),
]
operations = [
migrations.RunSQL(
# PostgreSQL: Create partial index
"CREATE INDEX CONCURRENTLY idx_published_articles "
"ON articles_article (created_at DESC) "
"WHERE is_published = true;",
reverse_sql="DROP INDEX idx_published_articles;"
),
migrations.RunSQL(
# PostgreSQL: Create GIN index for full-text search
"CREATE INDEX CONCURRENTLY idx_article_search "
"ON articles_article USING gin(to_tsvector('english', title || ' ' || content));",
reverse_sql="DROP INDEX idx_article_search;"
),
]
# Analyze index usage (PostgreSQL)
def analyze_index_usage():
with connection.cursor() as cursor:
cursor.execute("""
SELECT
schemaname,
tablename,
indexname,
idx_tup_read,
idx_tup_fetch,
idx_scan
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY idx_scan DESC
""")
indexes = cursor.fetchall()
print("Index Usage Statistics:")
for schema, table, index, tup_read, tup_fetch, scans in indexes:
print(f"{table}.{index}: {scans} scans, {tup_read} tuples read")
import time
import logging
from django.db import connection
from contextlib import contextmanager
logger = logging.getLogger(__name__)
@contextmanager
def query_profiler(description="Query block"):
"""Context manager to profile database queries"""
initial_queries = len(connection.queries)
start_time = time.time()
yield
end_time = time.time()
num_queries = len(connection.queries) - initial_queries
duration = end_time - start_time
logger.info(
f"{description}: {num_queries} queries in {duration:.4f}s"
)
# Log slow queries
if duration > 0.5:
recent_queries = connection.queries[initial_queries:]
for query in recent_queries:
if float(query['time']) > 0.1:
logger.warning(f"Slow query ({query['time']}s): {query['sql'][:200]}...")
# Usage
def expensive_view(request):
with query_profiler("Article list with comments"):
articles = Article.objects.select_related('author').prefetch_related(
'comments__author'
).all()[:20]
return render(request, 'articles.html', {'articles': articles})
from functools import wraps
def optimize_queries(max_queries=10):
"""Decorator to ensure views don't exceed query limits"""
def decorator(view_func):
@wraps(view_func)
def wrapper(request, *args, **kwargs):
initial_queries = len(connection.queries)
response = view_func(request, *args, **kwargs)
num_queries = len(connection.queries) - initial_queries
if num_queries > max_queries:
logger.warning(
f"View {view_func.__name__} executed {num_queries} queries "
f"(limit: {max_queries})"
)
return response
return wrapper
return decorator
# Usage
@optimize_queries(max_queries=5)
def article_list(request):
articles = Article.objects.select_related('author').all()[:20]
return render(request, 'articles.html', {'articles': articles})
from django.core.cache import cache
from django.core.cache.utils import make_template_fragment_key
def get_popular_articles(limit=10):
"""Get popular articles with caching"""
cache_key = f'popular_articles_{limit}'
articles = cache.get(cache_key)
if articles is None:
articles = list(
Article.objects.select_related('author', 'category')
.filter(is_published=True)
.order_by('-view_count')[:limit]
)
# Cache for 1 hour
cache.set(cache_key, articles, 3600)
return articles
# Cache invalidation
def invalidate_article_cache(article):
"""Invalidate related caches when article changes"""
cache_keys = [
'popular_articles_10',
'popular_articles_20',
f'article_{article.id}',
f'category_articles_{article.category_id}',
]
cache.delete_many(cache_keys)
# settings.py
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'OPTIONS': {
# Connection pooling
'MAX_CONNS': 20,
'MIN_CONNS': 5,
# Query optimization
'OPTIONS': {
'MAX_CONNS': 20,
'MIN_CONNS': 5,
'options': '-c default_transaction_isolation=read_committed'
}
},
'CONN_MAX_AGE': 600, # Connection reuse
'CONN_HEALTH_CHECKS': True,
}
}
# Custom database router for read/write splitting
class DatabaseRouter:
def db_for_read(self, model, **hints):
"""Reading from the read database."""
return 'read'
def db_for_write(self, model, **hints):
"""Writing to the write database."""
return 'write'
from django.test import TestCase, override_settings
from django.test.utils import override_settings
class QueryOptimizationTest(TestCase):
def setUp(self):
# Create test data
self.create_test_data()
def test_article_list_queries(self):
"""Test that article list doesn't have N+1 queries"""
with self.assertNumQueries(3): # articles, authors, tags
articles = Article.objects.select_related('author').prefetch_related('tags').all()[:10]
# Force evaluation
for article in articles:
_ = article.author.name
_ = list(article.tags.all())
def test_query_performance(self):
"""Test query performance benchmarks"""
import time
start_time = time.time()
# Your optimized query
articles = Article.objects.select_related('author').prefetch_related('tags').all()[:100]
list(articles) # Force evaluation
end_time = time.time()
duration = end_time - start_time
# Assert performance requirement
self.assertLess(duration, 0.5, "Query took too long")
@override_settings(DEBUG=True)
def test_query_analysis(self):
"""Analyze queries for optimization opportunities"""
from django.db import connection
connection.queries_log.clear()
# Execute your view logic
articles = Article.objects.all()[:10]
for article in articles:
_ = article.author.name
queries = connection.queries
# Analyze query patterns
similar_queries = {}
for query in queries:
sql = query['sql']
# Group similar queries
pattern = re.sub(r'\d+', 'N', sql)
similar_queries[pattern] = similar_queries.get(pattern, 0) + 1
# Check for N+1 patterns
for pattern, count in similar_queries.items():
if count > 5:
self.fail(f"Potential N+1 query detected: {pattern} (count: {count})")
This comprehensive query optimization guide provides the foundation for building high-performance Django applications by eliminating database bottlenecks and implementing efficient data access patterns.
Performance and Optimization
Performance optimization is critical for building scalable Django applications that provide excellent user experiences. This comprehensive guide covers database query optimization, template rendering, caching strategies, profiling techniques, and advanced optimization patterns that transform slow applications into high-performance systems.
Template Rendering Optimization
Template rendering can be a significant performance bottleneck in Django applications. This chapter covers comprehensive template optimization techniques, from reducing template complexity to implementing advanced caching strategies that dramatically improve rendering performance.