Django's aggregation framework provides powerful tools for performing calculations across multiple database records. Understanding aggregation functions, grouping, and annotation enables you to generate reports, statistics, and analytical data efficiently at the database level.
from django.db.models import Count, Sum, Avg, Max, Min, StdDev, Variance
from myapp.models import Post, Author, Category, Comment
# Count all posts
total_posts = Post.objects.count()
# Count published posts
published_count = Post.objects.filter(status='published').count()
# Aggregate functions return dictionaries
stats = Post.objects.aggregate(
total_posts=Count('id'),
avg_views=Avg('view_count'),
max_views=Max('view_count'),
min_views=Min('view_count'),
total_views=Sum('view_count')
)
# Returns: {'total_posts': 150, 'avg_views': 1250.5, 'max_views': 5000, ...}
# Single aggregation
avg_views = Post.objects.aggregate(Avg('view_count'))['view_count__avg']
# Aggregation with filtering
popular_post_stats = Post.objects.filter(
view_count__gte=1000
).aggregate(
count=Count('id'),
avg_views=Avg('view_count'),
total_views=Sum('view_count')
)
# Date-based aggregations
from django.utils import timezone
from datetime import timedelta
recent_stats = Post.objects.filter(
created_at__gte=timezone.now() - timedelta(days=30)
).aggregate(
recent_posts=Count('id'),
avg_daily_views=Avg('view_count'),
total_recent_views=Sum('view_count')
)
# Aggregate across foreign key relationships
author_stats = Author.objects.aggregate(
total_posts=Count('posts'),
avg_posts_per_author=Avg('posts'),
max_author_posts=Max('posts__view_count')
)
# Aggregate with relationship filtering
tech_category_stats = Post.objects.filter(
category__name='Technology'
).aggregate(
tech_posts=Count('id'),
avg_tech_views=Avg('view_count'),
total_tech_comments=Count('comments')
)
# Many-to-many aggregations
tag_stats = Post.objects.aggregate(
posts_with_tags=Count('tags', distinct=True),
avg_tags_per_post=Avg('tags')
)
# Complex relationship aggregations
comment_stats = Comment.objects.select_related('post').aggregate(
total_comments=Count('id'),
avg_comments_per_post=Avg('post__comments'),
posts_with_comments=Count('post', distinct=True)
)
from django.db.models import F, Count, Sum, Avg
# Annotate each post with comment count
posts_with_comment_count = Post.objects.annotate(
comment_count=Count('comments')
)
for post in posts_with_comment_count:
print(f"{post.title}: {post.comment_count} comments")
# Multiple annotations
posts_with_stats = Post.objects.annotate(
comment_count=Count('comments'),
like_count=Count('likes'),
tag_count=Count('tags'),
engagement_score=F('view_count') + Count('comments') * 5
).order_by('-engagement_score')
# Annotations with filtering
popular_posts = Post.objects.annotate(
comment_count=Count('comments')
).filter(
comment_count__gte=10,
view_count__gte=1000
)
# Conditional annotations
from django.db.models import Case, When, IntegerField
posts_with_popularity = Post.objects.annotate(
popularity_level=Case(
When(view_count__gte=10000, then=3),
When(view_count__gte=1000, then=2),
When(view_count__gte=100, then=1),
default=0,
output_field=IntegerField()
)
)
# Group by author and count posts
author_post_counts = Post.objects.values('author__username').annotate(
post_count=Count('id'),
avg_views=Avg('view_count'),
total_views=Sum('view_count')
).order_by('-post_count')
# Group by category
category_stats = Post.objects.values('category__name').annotate(
post_count=Count('id'),
avg_views=Avg('view_count'),
latest_post=Max('created_at')
).order_by('-post_count')
# Group by date
from django.db.models.functions import TruncDate, TruncMonth, TruncYear
# Posts per day
daily_post_counts = Post.objects.annotate(
date=TruncDate('created_at')
).values('date').annotate(
count=Count('id')
).order_by('date')
# Posts per month
monthly_stats = Post.objects.annotate(
month=TruncMonth('created_at')
).values('month').annotate(
post_count=Count('id'),
avg_views=Avg('view_count'),
total_views=Sum('view_count')
).order_by('month')
# Group by multiple fields
author_category_stats = Post.objects.values(
'author__username', 'category__name'
).annotate(
post_count=Count('id'),
avg_views=Avg('view_count')
).order_by('author__username', '-post_count')
from django.db.models import Q, F, Case, When, Value
from django.db.models.functions import Coalesce, Greatest, Least
# Conditional counting
posts_with_conditional_counts = Post.objects.annotate(
approved_comments=Count('comments', filter=Q(comments__is_approved=True)),
pending_comments=Count('comments', filter=Q(comments__is_approved=False)),
recent_comments=Count(
'comments',
filter=Q(comments__created_at__gte=timezone.now() - timedelta(days=7))
)
)
# Complex calculations
posts_with_metrics = Post.objects.annotate(
# Engagement rate (comments per view)
engagement_rate=Case(
When(view_count=0, then=Value(0.0)),
default=Count('comments') * 100.0 / F('view_count'),
output_field=models.FloatField()
),
# Quality score (combination of metrics)
quality_score=(
F('view_count') * 0.1 +
Count('comments') * 2 +
Count('likes') * 1.5
),
# Days since publication
days_since_published=Case(
When(published_at__isnull=True, then=Value(None)),
default=(timezone.now().date() - F('published_at__date')),
output_field=models.IntegerField()
)
)
# Subquery annotations
from django.db.models import OuterRef, Subquery
# Annotate with latest comment date
latest_comment_subquery = Comment.objects.filter(
post=OuterRef('pk'),
is_approved=True
).order_by('-created_at').values('created_at')[:1]
posts_with_latest_comment = Post.objects.annotate(
latest_comment_date=Subquery(latest_comment_subquery)
)
# Annotate with related model statistics
author_avg_views_subquery = Post.objects.filter(
author=OuterRef('author')
).aggregate(avg_views=Avg('view_count'))['avg_views']
posts_with_author_avg = Post.objects.annotate(
author_avg_views=Subquery(
Post.objects.filter(
author=OuterRef('author')
).aggregate(avg_views=Avg('view_count')).values('avg_views')
)
)
from django.db.models.functions import (
Extract, TruncDate, TruncWeek, TruncMonth, TruncQuarter, TruncYear
)
# Group by year
yearly_stats = Post.objects.annotate(
year=TruncYear('created_at')
).values('year').annotate(
post_count=Count('id'),
avg_views=Avg('view_count'),
total_views=Sum('view_count')
).order_by('year')
# Group by quarter
quarterly_stats = Post.objects.annotate(
quarter=TruncQuarter('created_at')
).values('quarter').annotate(
post_count=Count('id'),
avg_views=Avg('view_count')
).order_by('quarter')
# Group by week
weekly_stats = Post.objects.annotate(
week=TruncWeek('created_at')
).values('week').annotate(
post_count=Count('id')
).order_by('week')
# Extract specific date parts
posts_by_weekday = Post.objects.annotate(
weekday=Extract('created_at', 'week_day')
).values('weekday').annotate(
count=Count('id')
).order_by('weekday')
posts_by_hour = Post.objects.annotate(
hour=Extract('created_at', 'hour')
).values('hour').annotate(
count=Count('id')
).order_by('hour')
# Time-based filtering with aggregation
def get_monthly_stats(year=None):
"""Get monthly post statistics for a specific year"""
queryset = Post.objects.all()
if year:
queryset = queryset.filter(created_at__year=year)
return queryset.annotate(
month=TruncMonth('created_at')
).values('month').annotate(
post_count=Count('id'),
avg_views=Avg('view_count'),
total_views=Sum('view_count'),
unique_authors=Count('author', distinct=True)
).order_by('month')
# Usage
stats_2023 = get_monthly_stats(2023)
all_time_monthly = get_monthly_stats()
from django.db.models import Window
from django.db.models.functions import RowNumber, Rank, DenseRank
# Rolling averages (requires window functions - PostgreSQL)
def get_posts_with_rolling_avg():
"""Get posts with 30-day rolling average views"""
return Post.objects.annotate(
rolling_avg_views=Window(
expression=Avg('view_count'),
partition_by=[TruncMonth('created_at')],
order_by='created_at'
)
).order_by('created_at')
# Ranking posts by views within categories
posts_with_rank = Post.objects.annotate(
view_rank=Window(
expression=Rank(),
partition_by=['category'],
order_by=F('view_count').desc()
)
).order_by('category', 'view_rank')
# Running totals
posts_with_running_total = Post.objects.annotate(
running_total_views=Window(
expression=Sum('view_count'),
order_by='created_at'
)
).order_by('created_at')
# Author statistics with post and comment aggregations
def get_author_comprehensive_stats():
"""Get comprehensive author statistics"""
return Author.objects.annotate(
# Post statistics
total_posts=Count('posts'),
published_posts=Count('posts', filter=Q(posts__status='published')),
draft_posts=Count('posts', filter=Q(posts__status='draft')),
# View statistics
total_views=Sum('posts__view_count'),
avg_views_per_post=Avg('posts__view_count'),
max_post_views=Max('posts__view_count'),
# Comment statistics
total_comments_received=Count('posts__comments'),
avg_comments_per_post=Avg('posts__comments'),
# Date statistics
first_post_date=Min('posts__created_at'),
latest_post_date=Max('posts__created_at'),
# Engagement metrics
engagement_score=Sum('posts__view_count') + Count('posts__comments') * 10
).filter(total_posts__gt=0).order_by('-engagement_score')
# Category performance analysis
def get_category_performance():
"""Analyze category performance metrics"""
return Category.objects.annotate(
# Basic counts
total_posts=Count('posts'),
published_posts=Count('posts', filter=Q(posts__status='published')),
# Performance metrics
avg_views_per_post=Avg('posts__view_count'),
total_category_views=Sum('posts__view_count'),
# Author diversity
unique_authors=Count('posts__author', distinct=True),
# Time metrics
days_since_last_post=Case(
When(posts__isnull=True, then=Value(None)),
default=timezone.now().date() - Max('posts__created_at__date'),
output_field=models.IntegerField()
),
# Engagement
comments_per_post=Avg('posts__comments'),
engagement_rate=Case(
When(total_category_views=0, then=Value(0.0)),
default=Count('posts__comments') * 100.0 / Sum('posts__view_count'),
output_field=models.FloatField()
)
).filter(total_posts__gt=0).order_by('-total_category_views')
def get_dashboard_analytics(days=30):
"""Get comprehensive dashboard analytics"""
cutoff_date = timezone.now() - timedelta(days=days)
# Overall statistics
overall_stats = Post.objects.filter(
created_at__gte=cutoff_date
).aggregate(
total_posts=Count('id'),
total_views=Sum('view_count'),
total_comments=Count('comments'),
avg_views_per_post=Avg('view_count'),
unique_authors=Count('author', distinct=True)
)
# Top performing posts
top_posts = Post.objects.filter(
created_at__gte=cutoff_date
).annotate(
comment_count=Count('comments'),
engagement_score=F('view_count') + Count('comments') * 5
).order_by('-engagement_score')[:10]
# Author leaderboard
top_authors = Author.objects.annotate(
recent_posts=Count(
'posts',
filter=Q(posts__created_at__gte=cutoff_date)
),
recent_views=Sum(
'posts__view_count',
filter=Q(posts__created_at__gte=cutoff_date)
)
).filter(recent_posts__gt=0).order_by('-recent_views')[:10]
# Category performance
category_performance = Category.objects.annotate(
recent_posts=Count(
'posts',
filter=Q(posts__created_at__gte=cutoff_date)
),
recent_views=Sum(
'posts__view_count',
filter=Q(posts__created_at__gte=cutoff_date)
)
).filter(recent_posts__gt=0).order_by('-recent_views')
# Daily trends
daily_trends = Post.objects.filter(
created_at__gte=cutoff_date
).annotate(
date=TruncDate('created_at')
).values('date').annotate(
posts_count=Count('id'),
total_views=Sum('view_count'),
total_comments=Count('comments')
).order_by('date')
return {
'overall_stats': overall_stats,
'top_posts': top_posts,
'top_authors': top_authors,
'category_performance': category_performance,
'daily_trends': list(daily_trends)
}
# Content performance analysis
def analyze_content_performance():
"""Analyze content performance patterns"""
# Performance by post length
length_performance = Post.objects.annotate(
content_length=Length('content'),
length_category=Case(
When(content_length__lt=500, then=Value('Short')),
When(content_length__lt=1500, then=Value('Medium')),
When(content_length__lt=3000, then=Value('Long')),
default=Value('Very Long'),
output_field=models.CharField()
)
).values('length_category').annotate(
avg_views=Avg('view_count'),
avg_comments=Avg('comments'),
post_count=Count('id')
).order_by('length_category')
# Performance by publication time
time_performance = Post.objects.annotate(
hour=Extract('published_at', 'hour'),
weekday=Extract('published_at', 'week_day')
).values('hour', 'weekday').annotate(
avg_views=Avg('view_count'),
post_count=Count('id')
).order_by('weekday', 'hour')
# Tag effectiveness
tag_performance = Tag.objects.annotate(
post_count=Count('posts'),
avg_views=Avg('posts__view_count'),
total_views=Sum('posts__view_count')
).filter(post_count__gte=5).order_by('-avg_views')
return {
'length_performance': list(length_performance),
'time_performance': list(time_performance),
'tag_performance': tag_performance
}
# Efficient aggregation patterns
class OptimizedAnalytics:
"""Optimized analytics queries"""
@staticmethod
def get_cached_stats(cache_key='blog_stats', timeout=3600):
"""Cache expensive aggregation queries"""
from django.core.cache import cache
stats = cache.get(cache_key)
if stats is None:
stats = Post.objects.aggregate(
total_posts=Count('id'),
total_views=Sum('view_count'),
avg_views=Avg('view_count'),
total_authors=Count('author', distinct=True)
)
cache.set(cache_key, stats, timeout)
return stats
@staticmethod
def get_paginated_author_stats(page=1, per_page=20):
"""Paginated author statistics"""
offset = (page - 1) * per_page
return Author.objects.annotate(
post_count=Count('posts'),
total_views=Sum('posts__view_count')
).filter(
post_count__gt=0
).order_by('-total_views')[offset:offset + per_page]
@staticmethod
def get_lightweight_trends(days=30):
"""Lightweight trend analysis"""
cutoff_date = timezone.now() - timedelta(days=days)
return Post.objects.filter(
created_at__gte=cutoff_date
).values('created_at__date').annotate(
count=Count('id')
).order_by('created_at__date')
# Usage
stats = OptimizedAnalytics.get_cached_stats()
author_page_1 = OptimizedAnalytics.get_paginated_author_stats(page=1)
trends = OptimizedAnalytics.get_lightweight_trends(days=7)
Django's aggregation framework enables you to perform complex analytical queries efficiently at the database level, providing the foundation for building comprehensive reporting and analytics features in your applications.
Managers and QuerySets
Django's Manager and QuerySet system provides a powerful abstraction for database operations. Understanding how to create custom managers and querysets enables you to build reusable, chainable query logic that keeps your code DRY and maintainable.
Search
Django provides multiple approaches for implementing search functionality, from simple text-based searches to full-text search capabilities. Understanding these options enables you to choose the right search solution for your application's needs.