Models and Databases

Aggregation

Django's aggregation framework provides powerful tools for performing calculations across multiple database records. Understanding aggregation functions, grouping, and annotation enables you to generate reports, statistics, and analytical data efficiently at the database level.

Aggregation

Django's aggregation framework provides powerful tools for performing calculations across multiple database records. Understanding aggregation functions, grouping, and annotation enables you to generate reports, statistics, and analytical data efficiently at the database level.

Basic Aggregation Functions

Simple Aggregations

from django.db.models import Count, Sum, Avg, Max, Min, StdDev, Variance
from myapp.models import Post, Author, Category, Comment

# Count all posts
total_posts = Post.objects.count()

# Count published posts
published_count = Post.objects.filter(status='published').count()

# Aggregate functions return dictionaries
stats = Post.objects.aggregate(
    total_posts=Count('id'),
    avg_views=Avg('view_count'),
    max_views=Max('view_count'),
    min_views=Min('view_count'),
    total_views=Sum('view_count')
)
# Returns: {'total_posts': 150, 'avg_views': 1250.5, 'max_views': 5000, ...}

# Single aggregation
avg_views = Post.objects.aggregate(Avg('view_count'))['view_count__avg']

# Aggregation with filtering
popular_post_stats = Post.objects.filter(
    view_count__gte=1000
).aggregate(
    count=Count('id'),
    avg_views=Avg('view_count'),
    total_views=Sum('view_count')
)

# Date-based aggregations
from django.utils import timezone
from datetime import timedelta

recent_stats = Post.objects.filter(
    created_at__gte=timezone.now() - timedelta(days=30)
).aggregate(
    recent_posts=Count('id'),
    avg_daily_views=Avg('view_count'),
    total_recent_views=Sum('view_count')
)

Aggregation Across Relationships

# Aggregate across foreign key relationships
author_stats = Author.objects.aggregate(
    total_posts=Count('posts'),
    avg_posts_per_author=Avg('posts'),
    max_author_posts=Max('posts__view_count')
)

# Aggregate with relationship filtering
tech_category_stats = Post.objects.filter(
    category__name='Technology'
).aggregate(
    tech_posts=Count('id'),
    avg_tech_views=Avg('view_count'),
    total_tech_comments=Count('comments')
)

# Many-to-many aggregations
tag_stats = Post.objects.aggregate(
    posts_with_tags=Count('tags', distinct=True),
    avg_tags_per_post=Avg('tags')
)

# Complex relationship aggregations
comment_stats = Comment.objects.select_related('post').aggregate(
    total_comments=Count('id'),
    avg_comments_per_post=Avg('post__comments'),
    posts_with_comments=Count('post', distinct=True)
)

Annotations and Grouping

Basic Annotations

from django.db.models import F, Count, Sum, Avg

# Annotate each post with comment count
posts_with_comment_count = Post.objects.annotate(
    comment_count=Count('comments')
)

for post in posts_with_comment_count:
    print(f"{post.title}: {post.comment_count} comments")

# Multiple annotations
posts_with_stats = Post.objects.annotate(
    comment_count=Count('comments'),
    like_count=Count('likes'),
    tag_count=Count('tags'),
    engagement_score=F('view_count') + Count('comments') * 5
).order_by('-engagement_score')

# Annotations with filtering
popular_posts = Post.objects.annotate(
    comment_count=Count('comments')
).filter(
    comment_count__gte=10,
    view_count__gte=1000
)

# Conditional annotations
from django.db.models import Case, When, IntegerField

posts_with_popularity = Post.objects.annotate(
    popularity_level=Case(
        When(view_count__gte=10000, then=3),
        When(view_count__gte=1000, then=2),
        When(view_count__gte=100, then=1),
        default=0,
        output_field=IntegerField()
    )
)

Grouping and Aggregation

# Group by author and count posts
author_post_counts = Post.objects.values('author__username').annotate(
    post_count=Count('id'),
    avg_views=Avg('view_count'),
    total_views=Sum('view_count')
).order_by('-post_count')

# Group by category
category_stats = Post.objects.values('category__name').annotate(
    post_count=Count('id'),
    avg_views=Avg('view_count'),
    latest_post=Max('created_at')
).order_by('-post_count')

# Group by date
from django.db.models.functions import TruncDate, TruncMonth, TruncYear

# Posts per day
daily_post_counts = Post.objects.annotate(
    date=TruncDate('created_at')
).values('date').annotate(
    count=Count('id')
).order_by('date')

# Posts per month
monthly_stats = Post.objects.annotate(
    month=TruncMonth('created_at')
).values('month').annotate(
    post_count=Count('id'),
    avg_views=Avg('view_count'),
    total_views=Sum('view_count')
).order_by('month')

# Group by multiple fields
author_category_stats = Post.objects.values(
    'author__username', 'category__name'
).annotate(
    post_count=Count('id'),
    avg_views=Avg('view_count')
).order_by('author__username', '-post_count')

Advanced Annotation Patterns

from django.db.models import Q, F, Case, When, Value
from django.db.models.functions import Coalesce, Greatest, Least

# Conditional counting
posts_with_conditional_counts = Post.objects.annotate(
    approved_comments=Count('comments', filter=Q(comments__is_approved=True)),
    pending_comments=Count('comments', filter=Q(comments__is_approved=False)),
    recent_comments=Count(
        'comments',
        filter=Q(comments__created_at__gte=timezone.now() - timedelta(days=7))
    )
)

# Complex calculations
posts_with_metrics = Post.objects.annotate(
    # Engagement rate (comments per view)
    engagement_rate=Case(
        When(view_count=0, then=Value(0.0)),
        default=Count('comments') * 100.0 / F('view_count'),
        output_field=models.FloatField()
    ),
    
    # Quality score (combination of metrics)
    quality_score=(
        F('view_count') * 0.1 +
        Count('comments') * 2 +
        Count('likes') * 1.5
    ),
    
    # Days since publication
    days_since_published=Case(
        When(published_at__isnull=True, then=Value(None)),
        default=(timezone.now().date() - F('published_at__date')),
        output_field=models.IntegerField()
    )
)

# Subquery annotations
from django.db.models import OuterRef, Subquery

# Annotate with latest comment date
latest_comment_subquery = Comment.objects.filter(
    post=OuterRef('pk'),
    is_approved=True
).order_by('-created_at').values('created_at')[:1]

posts_with_latest_comment = Post.objects.annotate(
    latest_comment_date=Subquery(latest_comment_subquery)
)

# Annotate with related model statistics
author_avg_views_subquery = Post.objects.filter(
    author=OuterRef('author')
).aggregate(avg_views=Avg('view_count'))['avg_views']

posts_with_author_avg = Post.objects.annotate(
    author_avg_views=Subquery(
        Post.objects.filter(
            author=OuterRef('author')
        ).aggregate(avg_views=Avg('view_count')).values('avg_views')
    )
)

Time-Based Aggregations

Date and Time Grouping

from django.db.models.functions import (
    Extract, TruncDate, TruncWeek, TruncMonth, TruncQuarter, TruncYear
)

# Group by year
yearly_stats = Post.objects.annotate(
    year=TruncYear('created_at')
).values('year').annotate(
    post_count=Count('id'),
    avg_views=Avg('view_count'),
    total_views=Sum('view_count')
).order_by('year')

# Group by quarter
quarterly_stats = Post.objects.annotate(
    quarter=TruncQuarter('created_at')
).values('quarter').annotate(
    post_count=Count('id'),
    avg_views=Avg('view_count')
).order_by('quarter')

# Group by week
weekly_stats = Post.objects.annotate(
    week=TruncWeek('created_at')
).values('week').annotate(
    post_count=Count('id')
).order_by('week')

# Extract specific date parts
posts_by_weekday = Post.objects.annotate(
    weekday=Extract('created_at', 'week_day')
).values('weekday').annotate(
    count=Count('id')
).order_by('weekday')

posts_by_hour = Post.objects.annotate(
    hour=Extract('created_at', 'hour')
).values('hour').annotate(
    count=Count('id')
).order_by('hour')

# Time-based filtering with aggregation
def get_monthly_stats(year=None):
    """Get monthly post statistics for a specific year"""
    queryset = Post.objects.all()
    
    if year:
        queryset = queryset.filter(created_at__year=year)
    
    return queryset.annotate(
        month=TruncMonth('created_at')
    ).values('month').annotate(
        post_count=Count('id'),
        avg_views=Avg('view_count'),
        total_views=Sum('view_count'),
        unique_authors=Count('author', distinct=True)
    ).order_by('month')

# Usage
stats_2023 = get_monthly_stats(2023)
all_time_monthly = get_monthly_stats()

Rolling Aggregations

from django.db.models import Window
from django.db.models.functions import RowNumber, Rank, DenseRank

# Rolling averages (requires window functions - PostgreSQL)
def get_posts_with_rolling_avg():
    """Get posts with 30-day rolling average views"""
    return Post.objects.annotate(
        rolling_avg_views=Window(
            expression=Avg('view_count'),
            partition_by=[TruncMonth('created_at')],
            order_by='created_at'
        )
    ).order_by('created_at')

# Ranking posts by views within categories
posts_with_rank = Post.objects.annotate(
    view_rank=Window(
        expression=Rank(),
        partition_by=['category'],
        order_by=F('view_count').desc()
    )
).order_by('category', 'view_rank')

# Running totals
posts_with_running_total = Post.objects.annotate(
    running_total_views=Window(
        expression=Sum('view_count'),
        order_by='created_at'
    )
).order_by('created_at')

Complex Aggregation Scenarios

Multi-Level Aggregations

# Author statistics with post and comment aggregations
def get_author_comprehensive_stats():
    """Get comprehensive author statistics"""
    return Author.objects.annotate(
        # Post statistics
        total_posts=Count('posts'),
        published_posts=Count('posts', filter=Q(posts__status='published')),
        draft_posts=Count('posts', filter=Q(posts__status='draft')),
        
        # View statistics
        total_views=Sum('posts__view_count'),
        avg_views_per_post=Avg('posts__view_count'),
        max_post_views=Max('posts__view_count'),
        
        # Comment statistics
        total_comments_received=Count('posts__comments'),
        avg_comments_per_post=Avg('posts__comments'),
        
        # Date statistics
        first_post_date=Min('posts__created_at'),
        latest_post_date=Max('posts__created_at'),
        
        # Engagement metrics
        engagement_score=Sum('posts__view_count') + Count('posts__comments') * 10
        
    ).filter(total_posts__gt=0).order_by('-engagement_score')

# Category performance analysis
def get_category_performance():
    """Analyze category performance metrics"""
    return Category.objects.annotate(
        # Basic counts
        total_posts=Count('posts'),
        published_posts=Count('posts', filter=Q(posts__status='published')),
        
        # Performance metrics
        avg_views_per_post=Avg('posts__view_count'),
        total_category_views=Sum('posts__view_count'),
        
        # Author diversity
        unique_authors=Count('posts__author', distinct=True),
        
        # Time metrics
        days_since_last_post=Case(
            When(posts__isnull=True, then=Value(None)),
            default=timezone.now().date() - Max('posts__created_at__date'),
            output_field=models.IntegerField()
        ),
        
        # Engagement
        comments_per_post=Avg('posts__comments'),
        engagement_rate=Case(
            When(total_category_views=0, then=Value(0.0)),
            default=Count('posts__comments') * 100.0 / Sum('posts__view_count'),
            output_field=models.FloatField()
        )
        
    ).filter(total_posts__gt=0).order_by('-total_category_views')

Dashboard Analytics

def get_dashboard_analytics(days=30):
    """Get comprehensive dashboard analytics"""
    cutoff_date = timezone.now() - timedelta(days=days)
    
    # Overall statistics
    overall_stats = Post.objects.filter(
        created_at__gte=cutoff_date
    ).aggregate(
        total_posts=Count('id'),
        total_views=Sum('view_count'),
        total_comments=Count('comments'),
        avg_views_per_post=Avg('view_count'),
        unique_authors=Count('author', distinct=True)
    )
    
    # Top performing posts
    top_posts = Post.objects.filter(
        created_at__gte=cutoff_date
    ).annotate(
        comment_count=Count('comments'),
        engagement_score=F('view_count') + Count('comments') * 5
    ).order_by('-engagement_score')[:10]
    
    # Author leaderboard
    top_authors = Author.objects.annotate(
        recent_posts=Count(
            'posts',
            filter=Q(posts__created_at__gte=cutoff_date)
        ),
        recent_views=Sum(
            'posts__view_count',
            filter=Q(posts__created_at__gte=cutoff_date)
        )
    ).filter(recent_posts__gt=0).order_by('-recent_views')[:10]
    
    # Category performance
    category_performance = Category.objects.annotate(
        recent_posts=Count(
            'posts',
            filter=Q(posts__created_at__gte=cutoff_date)
        ),
        recent_views=Sum(
            'posts__view_count',
            filter=Q(posts__created_at__gte=cutoff_date)
        )
    ).filter(recent_posts__gt=0).order_by('-recent_views')
    
    # Daily trends
    daily_trends = Post.objects.filter(
        created_at__gte=cutoff_date
    ).annotate(
        date=TruncDate('created_at')
    ).values('date').annotate(
        posts_count=Count('id'),
        total_views=Sum('view_count'),
        total_comments=Count('comments')
    ).order_by('date')
    
    return {
        'overall_stats': overall_stats,
        'top_posts': top_posts,
        'top_authors': top_authors,
        'category_performance': category_performance,
        'daily_trends': list(daily_trends)
    }

# Content performance analysis
def analyze_content_performance():
    """Analyze content performance patterns"""
    
    # Performance by post length
    length_performance = Post.objects.annotate(
        content_length=Length('content'),
        length_category=Case(
            When(content_length__lt=500, then=Value('Short')),
            When(content_length__lt=1500, then=Value('Medium')),
            When(content_length__lt=3000, then=Value('Long')),
            default=Value('Very Long'),
            output_field=models.CharField()
        )
    ).values('length_category').annotate(
        avg_views=Avg('view_count'),
        avg_comments=Avg('comments'),
        post_count=Count('id')
    ).order_by('length_category')
    
    # Performance by publication time
    time_performance = Post.objects.annotate(
        hour=Extract('published_at', 'hour'),
        weekday=Extract('published_at', 'week_day')
    ).values('hour', 'weekday').annotate(
        avg_views=Avg('view_count'),
        post_count=Count('id')
    ).order_by('weekday', 'hour')
    
    # Tag effectiveness
    tag_performance = Tag.objects.annotate(
        post_count=Count('posts'),
        avg_views=Avg('posts__view_count'),
        total_views=Sum('posts__view_count')
    ).filter(post_count__gte=5).order_by('-avg_views')
    
    return {
        'length_performance': list(length_performance),
        'time_performance': list(time_performance),
        'tag_performance': tag_performance
    }

Performance Optimization for Aggregations

# Efficient aggregation patterns
class OptimizedAnalytics:
    """Optimized analytics queries"""
    
    @staticmethod
    def get_cached_stats(cache_key='blog_stats', timeout=3600):
        """Cache expensive aggregation queries"""
        from django.core.cache import cache
        
        stats = cache.get(cache_key)
        if stats is None:
            stats = Post.objects.aggregate(
                total_posts=Count('id'),
                total_views=Sum('view_count'),
                avg_views=Avg('view_count'),
                total_authors=Count('author', distinct=True)
            )
            cache.set(cache_key, stats, timeout)
        
        return stats
    
    @staticmethod
    def get_paginated_author_stats(page=1, per_page=20):
        """Paginated author statistics"""
        offset = (page - 1) * per_page
        
        return Author.objects.annotate(
            post_count=Count('posts'),
            total_views=Sum('posts__view_count')
        ).filter(
            post_count__gt=0
        ).order_by('-total_views')[offset:offset + per_page]
    
    @staticmethod
    def get_lightweight_trends(days=30):
        """Lightweight trend analysis"""
        cutoff_date = timezone.now() - timedelta(days=days)
        
        return Post.objects.filter(
            created_at__gte=cutoff_date
        ).values('created_at__date').annotate(
            count=Count('id')
        ).order_by('created_at__date')

# Usage
stats = OptimizedAnalytics.get_cached_stats()
author_page_1 = OptimizedAnalytics.get_paginated_author_stats(page=1)
trends = OptimizedAnalytics.get_lightweight_trends(days=7)

Django's aggregation framework enables you to perform complex analytical queries efficiently at the database level, providing the foundation for building comprehensive reporting and analytics features in your applications.