Django provides multiple approaches for implementing search functionality, from simple text-based searches to full-text search capabilities. Understanding these options enables you to choose the right search solution for your application's needs.
from django.db.models import Q
from myapp.models import Post, Author
# Basic contains search
def simple_search(query):
"""Simple case-insensitive search across title and content"""
return Post.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
).distinct()
# Multi-field search
def multi_field_search(query):
"""Search across multiple fields"""
return Post.objects.filter(
Q(title__icontains=query) |
Q(content__icontains=query) |
Q(author__first_name__icontains=query) |
Q(author__last_name__icontains=query) |
Q(tags__name__icontains=query)
).distinct()
# Word-based search
def word_search(query):
"""Search for individual words"""
words = query.split()
q_objects = Q()
for word in words:
q_objects |= (
Q(title__icontains=word) |
Q(content__icontains=word)
)
return Post.objects.filter(q_objects).distinct()
# Advanced text search with ranking
def ranked_search(query):
"""Search with basic ranking by relevance"""
from django.db.models import Case, When, IntegerField, F
return Post.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
).annotate(
relevance=Case(
# Title matches get higher score
When(title__icontains=query, then=3),
# Content matches get lower score
When(content__icontains=query, then=1),
default=0,
output_field=IntegerField()
)
).order_by('-relevance', '-created_at').distinct()
import re
from django.db.models import Q, F, Value, CharField
from django.db.models.functions import Concat
class SearchManager:
"""Advanced search functionality"""
@staticmethod
def fuzzy_search(query, model=Post):
"""Fuzzy search with multiple strategies"""
# Exact phrase search
exact_matches = model.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
)
# Word-based search
words = query.split()
word_queries = Q()
for word in words:
word_queries |= (
Q(title__icontains=word) |
Q(content__icontains=word)
)
word_matches = model.objects.filter(word_queries)
# Combine results with exact matches first
return exact_matches.union(word_matches, all=False)
@staticmethod
def search_with_filters(query, **filters):
"""Search with additional filters"""
# Base search query
search_q = Q(title__icontains=query) | Q(content__icontains=query)
# Apply additional filters
filter_q = Q()
for field, value in filters.items():
if value:
filter_q &= Q(**{field: value})
return Post.objects.filter(search_q & filter_q).distinct()
@staticmethod
def autocomplete_search(query, field='title', limit=10):
"""Autocomplete suggestions"""
return Post.objects.filter(
**{f'{field}__istartswith': query}
).values_list(field, flat=True).distinct()[:limit]
@staticmethod
def search_suggestions(query):
"""Generate search suggestions"""
# Get similar titles
title_suggestions = Post.objects.filter(
title__icontains=query
).values_list('title', flat=True)[:5]
# Get related tags
tag_suggestions = Tag.objects.filter(
name__icontains=query
).values_list('name', flat=True)[:5]
# Get author names
author_suggestions = Author.objects.annotate(
full_name=Concat('first_name', Value(' '), 'last_name')
).filter(
full_name__icontains=query
).values_list('full_name', flat=True)[:5]
return {
'titles': list(title_suggestions),
'tags': list(tag_suggestions),
'authors': list(author_suggestions)
}
# Usage examples
results = SearchManager.fuzzy_search('django tutorial')
filtered_results = SearchManager.search_with_filters(
'python',
category__slug='programming',
status='published'
)
suggestions = SearchManager.autocomplete_search('djan')
# PostgreSQL-specific full-text search
from django.contrib.postgres.search import (
SearchVector, SearchQuery, SearchRank, SearchHeadline
)
class PostgreSQLSearch:
"""PostgreSQL full-text search implementation"""
@staticmethod
def basic_full_text_search(query):
"""Basic full-text search using SearchVector"""
search_vector = SearchVector('title', weight='A') + SearchVector('content', weight='B')
search_query = SearchQuery(query)
return Post.objects.annotate(
search=search_vector
).filter(search=search_query)
@staticmethod
def ranked_full_text_search(query):
"""Full-text search with ranking"""
search_vector = SearchVector('title', weight='A') + SearchVector('content', weight='B')
search_query = SearchQuery(query)
return Post.objects.annotate(
search=search_vector,
rank=SearchRank(search_vector, search_query)
).filter(search=search_query).order_by('-rank')
@staticmethod
def advanced_search_with_headlines(query):
"""Search with highlighted excerpts"""
search_vector = SearchVector('title', weight='A') + SearchVector('content', weight='B')
search_query = SearchQuery(query)
return Post.objects.annotate(
search=search_vector,
rank=SearchRank(search_vector, search_query),
headline=SearchHeadline('content', search_query)
).filter(search=search_query).order_by('-rank')
@staticmethod
def multi_language_search(query, language='english'):
"""Multi-language full-text search"""
search_vector = (
SearchVector('title', weight='A', config=language) +
SearchVector('content', weight='B', config=language)
)
search_query = SearchQuery(query, config=language)
return Post.objects.annotate(
search=search_vector,
rank=SearchRank(search_vector, search_query)
).filter(search=search_query).order_by('-rank')
@staticmethod
def boolean_search(query):
"""Boolean search with operators"""
# Example: "python & django | web" (python AND django OR web)
search_vector = SearchVector('title') + SearchVector('content')
search_query = SearchQuery(query, search_type='raw')
return Post.objects.annotate(
search=search_vector,
rank=SearchRank(search_vector, search_query)
).filter(search=search_query).order_by('-rank')
# Create search index for better performance
# In your migration:
"""
from django.contrib.postgres.operations import TrigramExtension
from django.contrib.postgres.indexes import GinIndex
from django.db import migrations
class Migration(migrations.Migration):
operations = [
TrigramExtension(),
migrations.RunSQL(
"CREATE INDEX post_search_idx ON blog_post USING GIN (to_tsvector('english', title || ' ' || content));"
),
]
"""
# Usage
basic_results = PostgreSQLSearch.basic_full_text_search('django tutorial')
ranked_results = PostgreSQLSearch.ranked_full_text_search('python web development')
with_headlines = PostgreSQLSearch.advanced_search_with_headlines('machine learning')
# Cross-database search implementation
class UniversalSearch:
"""Database-agnostic search implementation"""
@staticmethod
def weighted_search(query):
"""Weighted search across multiple fields"""
from django.db.models import Case, When, IntegerField, Q
words = query.split()
# Build search conditions
title_conditions = Q()
content_conditions = Q()
for word in words:
title_conditions |= Q(title__icontains=word)
content_conditions |= Q(content__icontains=word)
return Post.objects.filter(
title_conditions | content_conditions
).annotate(
relevance_score=Case(
# Exact title match
When(title__iexact=query, then=100),
# Title contains all words
When(title_conditions, then=50),
# Content contains query
When(content_conditions, then=25),
default=0,
output_field=IntegerField()
)
).filter(relevance_score__gt=0).order_by('-relevance_score', '-created_at')
@staticmethod
def faceted_search(query, facets=None):
"""Search with faceted filtering"""
# Base search
results = Post.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
)
# Apply facets
if facets:
if 'category' in facets:
results = results.filter(category__slug__in=facets['category'])
if 'author' in facets:
results = results.filter(author__slug__in=facets['author'])
if 'tags' in facets:
results = results.filter(tags__slug__in=facets['tags'])
if 'date_range' in facets:
start_date, end_date = facets['date_range']
results = results.filter(created_at__range=(start_date, end_date))
return results.distinct()
@staticmethod
def get_search_facets(query):
"""Get available facets for search results"""
# Get base search results
base_results = Post.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
)
# Calculate facets
categories = base_results.values(
'category__slug', 'category__name'
).annotate(count=Count('id')).order_by('-count')
authors = base_results.values(
'author__slug', 'author__first_name', 'author__last_name'
).annotate(count=Count('id')).order_by('-count')
tags = base_results.values(
'tags__slug', 'tags__name'
).annotate(count=Count('id')).order_by('-count')
return {
'categories': list(categories),
'authors': list(authors),
'tags': list(tags),
'total_results': base_results.count()
}
# Elasticsearch integration example
from elasticsearch import Elasticsearch
from django.conf import settings
class ElasticsearchSearch:
"""Elasticsearch search implementation"""
def __init__(self):
self.es = Elasticsearch([settings.ELASTICSEARCH_URL])
self.index_name = 'blog_posts'
def index_post(self, post):
"""Index a post in Elasticsearch"""
doc = {
'id': post.id,
'title': post.title,
'content': post.content,
'author': post.author.get_full_name(),
'category': post.category.name if post.category else None,
'tags': [tag.name for tag in post.tags.all()],
'created_at': post.created_at,
'view_count': post.view_count,
'status': post.status
}
self.es.index(
index=self.index_name,
id=post.id,
body=doc
)
def search_posts(self, query, filters=None, size=20, from_=0):
"""Search posts using Elasticsearch"""
# Build search query
search_body = {
'query': {
'bool': {
'must': [
{
'multi_match': {
'query': query,
'fields': ['title^3', 'content', 'author', 'tags'],
'type': 'best_fields',
'fuzziness': 'AUTO'
}
}
],
'filter': []
}
},
'highlight': {
'fields': {
'title': {},
'content': {'fragment_size': 150}
}
},
'sort': [
{'_score': {'order': 'desc'}},
{'created_at': {'order': 'desc'}}
],
'size': size,
'from': from_
}
# Add filters
if filters:
if 'category' in filters:
search_body['query']['bool']['filter'].append({
'term': {'category.keyword': filters['category']}
})
if 'author' in filters:
search_body['query']['bool']['filter'].append({
'term': {'author.keyword': filters['author']}
})
if 'date_range' in filters:
search_body['query']['bool']['filter'].append({
'range': {
'created_at': {
'gte': filters['date_range']['start'],
'lte': filters['date_range']['end']
}
}
})
# Execute search
response = self.es.search(
index=self.index_name,
body=search_body
)
return self.format_search_results(response)
def format_search_results(self, response):
"""Format Elasticsearch response"""
results = []
for hit in response['hits']['hits']:
result = {
'id': hit['_source']['id'],
'title': hit['_source']['title'],
'content': hit['_source']['content'][:200] + '...',
'author': hit['_source']['author'],
'score': hit['_score'],
'highlights': hit.get('highlight', {})
}
results.append(result)
return {
'results': results,
'total': response['hits']['total']['value'],
'max_score': response['hits']['max_score']
}
def get_suggestions(self, query):
"""Get search suggestions"""
search_body = {
'suggest': {
'title_suggest': {
'prefix': query,
'completion': {
'field': 'title_suggest',
'size': 10
}
}
}
}
response = self.es.search(
index=self.index_name,
body=search_body
)
suggestions = []
for suggestion in response['suggest']['title_suggest'][0]['options']:
suggestions.append(suggestion['text'])
return suggestions
# Django management command to index posts
"""
# management/commands/index_posts.py
from django.core.management.base import BaseCommand
from myapp.models import Post
from myapp.search import ElasticsearchSearch
class Command(BaseCommand):
help = 'Index all posts in Elasticsearch'
def handle(self, *args, **options):
es_search = ElasticsearchSearch()
posts = Post.objects.filter(status='published')
total = posts.count()
for i, post in enumerate(posts, 1):
es_search.index_post(post)
if i % 100 == 0:
self.stdout.write(f'Indexed {i}/{total} posts')
self.stdout.write(
self.style.SUCCESS(f'Successfully indexed {total} posts')
)
"""
# Apache Solr integration
import pysolr
from django.conf import settings
class SolrSearch:
"""Apache Solr search implementation"""
def __init__(self):
self.solr = pysolr.Solr(settings.SOLR_URL, always_commit=True)
def index_post(self, post):
"""Index a post in Solr"""
doc = {
'id': f'post_{post.id}',
'title': post.title,
'content': post.content,
'author': post.author.get_full_name(),
'category': post.category.name if post.category else None,
'tags': [tag.name for tag in post.tags.all()],
'created_at': post.created_at.isoformat(),
'view_count': post.view_count,
'status': post.status
}
self.solr.add([doc])
def search_posts(self, query, filters=None, rows=20, start=0):
"""Search posts using Solr"""
# Build search parameters
params = {
'q': query,
'rows': rows,
'start': start,
'hl': 'true',
'hl.fl': 'title,content',
'hl.simple.pre': '<mark>',
'hl.simple.post': '</mark>',
'sort': 'score desc, created_at desc'
}
# Add filters
if filters:
fq = []
if 'category' in filters:
fq.append(f'category:"{filters["category"]}"')
if 'author' in filters:
fq.append(f'author:"{filters["author"]}"')
if 'status' in filters:
fq.append(f'status:"{filters["status"]}"')
if fq:
params['fq'] = fq
# Execute search
results = self.solr.search(**params)
return self.format_solr_results(results)
def format_solr_results(self, results):
"""Format Solr search results"""
formatted_results = []
for doc in results.docs:
result = {
'id': doc['id'].replace('post_', ''),
'title': doc['title'],
'content': doc['content'][:200] + '...',
'author': doc['author'],
'score': doc.get('score', 0)
}
formatted_results.append(result)
return {
'results': formatted_results,
'total': results.hits,
'highlighting': results.highlighting
}
# Search performance optimization techniques
class OptimizedSearch:
"""Performance-optimized search implementation"""
@staticmethod
def create_search_indexes():
"""Create database indexes for search performance"""
# PostgreSQL GIN indexes for full-text search
"""
CREATE INDEX CONCURRENTLY post_title_gin_idx
ON blog_post USING GIN (to_tsvector('english', title));
CREATE INDEX CONCURRENTLY post_content_gin_idx
ON blog_post USING GIN (to_tsvector('english', content));
CREATE INDEX CONCURRENTLY post_search_gin_idx
ON blog_post USING GIN (to_tsvector('english', title || ' ' || content));
"""
# Regular indexes for filtering
"""
CREATE INDEX CONCURRENTLY post_status_created_idx
ON blog_post (status, created_at DESC);
CREATE INDEX CONCURRENTLY post_author_status_idx
ON blog_post (author_id, status);
"""
@staticmethod
def cached_search(query, cache_timeout=300):
"""Cache search results"""
from django.core.cache import cache
import hashlib
# Create cache key
cache_key = f"search_{hashlib.md5(query.encode()).hexdigest()}"
# Try to get from cache
results = cache.get(cache_key)
if results is None:
# Perform search
results = Post.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
).select_related('author', 'category').prefetch_related('tags')
# Cache results
cache.set(cache_key, results, cache_timeout)
return results
@staticmethod
def paginated_search(query, page=1, per_page=20):
"""Efficient paginated search"""
from django.core.paginator import Paginator
# Base query with optimizations
queryset = Post.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
).select_related('author', 'category').only(
'id', 'title', 'slug', 'excerpt', 'created_at',
'author__username', 'category__name'
)
# Paginate
paginator = Paginator(queryset, per_page)
page_obj = paginator.get_page(page)
return {
'results': page_obj.object_list,
'page': page_obj,
'total_results': paginator.count
}
@staticmethod
def search_with_aggregations(query):
"""Search with result aggregations"""
from django.db.models import Count
base_query = Post.objects.filter(
Q(title__icontains=query) | Q(content__icontains=query)
)
# Get results
results = base_query.select_related('author', 'category')
# Get aggregations
category_counts = base_query.values('category__name').annotate(
count=Count('id')
).order_by('-count')
author_counts = base_query.values('author__username').annotate(
count=Count('id')
).order_by('-count')
return {
'results': results,
'category_facets': list(category_counts),
'author_facets': list(author_counts),
'total_count': base_query.count()
}
# Search analytics
class SearchAnalytics:
"""Track and analyze search behavior"""
@staticmethod
def log_search(query, user=None, results_count=0):
"""Log search queries for analytics"""
SearchLog.objects.create(
query=query,
user=user,
results_count=results_count,
timestamp=timezone.now()
)
@staticmethod
def get_popular_searches(days=30, limit=10):
"""Get most popular search queries"""
cutoff_date = timezone.now() - timedelta(days=days)
return SearchLog.objects.filter(
timestamp__gte=cutoff_date
).values('query').annotate(
search_count=Count('id')
).order_by('-search_count')[:limit]
@staticmethod
def get_zero_result_searches(days=7):
"""Get searches that returned no results"""
cutoff_date = timezone.now() - timedelta(days=days)
return SearchLog.objects.filter(
timestamp__gte=cutoff_date,
results_count=0
).values('query').annotate(
count=Count('id')
).order_by('-count')
# Search log model
class SearchLog(models.Model):
query = models.CharField(max_length=255)
user = models.ForeignKey(User, on_delete=models.SET_NULL, null=True, blank=True)
results_count = models.PositiveIntegerField(default=0)
timestamp = models.DateTimeField(auto_now_add=True)
class Meta:
indexes = [
models.Index(fields=['timestamp', 'query']),
models.Index(fields=['results_count', 'timestamp']),
]
Implementing effective search functionality requires choosing the right approach based on your application's needs, data volume, and performance requirements. From simple text filtering to full-text search engines, Django provides the flexibility to build sophisticated search experiences.
Aggregation
Django's aggregation framework provides powerful tools for performing calculations across multiple database records. Understanding aggregation functions, grouping, and annotation enables you to generate reports, statistics, and analytical data efficiently at the database level.
Raw SQL Queries
While Django's ORM handles most database operations elegantly, there are times when you need the power and flexibility of raw SQL. Understanding how to safely execute raw SQL queries enables you to optimize performance, use database-specific features, and handle complex operations that are difficult to express with the ORM.