Tablespaces provide a way to define locations in the file system where database objects can be stored. While primarily a PostgreSQL and Oracle feature, understanding tablespaces helps optimize database performance by strategically placing data on different storage devices.
# Tablespaces allow you to:
# 1. Store large tables on faster storage (SSD)
# 2. Separate indexes from table data
# 3. Distribute I/O load across multiple disks
# 4. Implement storage tiering strategies
# 5. Manage disk space more effectively
# PostgreSQL tablespace example in Django settings
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': 'myapp_db',
'USER': 'postgres',
'PASSWORD': 'password',
'HOST': 'localhost',
'PORT': '5432',
'OPTIONS': {
'options': '-c default_tablespace=fast_storage'
},
}
}
# Tablespace configuration in PostgreSQL
"""
-- Create tablespaces (run as PostgreSQL superuser)
CREATE TABLESPACE fast_storage LOCATION '/mnt/ssd/postgresql';
CREATE TABLESPACE archive_storage LOCATION '/mnt/hdd/postgresql';
CREATE TABLESPACE index_storage LOCATION '/mnt/nvme/postgresql';
"""
# models.py
from django.db import models
class HighVolumeData(models.Model):
"""Model with tablespace configuration for high-volume data"""
timestamp = models.DateTimeField(auto_now_add=True)
event_type = models.CharField(max_length=50)
user_id = models.IntegerField()
data = models.JSONField()
class Meta:
# Store table in fast storage tablespace
db_tablespace = 'fast_storage'
# Indexes can use different tablespace
indexes = [
models.Index(
fields=['timestamp', 'event_type'],
name='idx_timestamp_event',
db_tablespace='index_storage'
),
models.Index(
fields=['user_id'],
name='idx_user_id',
db_tablespace='index_storage'
),
]
class ArchiveData(models.Model):
"""Model for archived data using slower storage"""
original_id = models.IntegerField()
archived_at = models.DateTimeField(auto_now_add=True)
data_snapshot = models.JSONField()
class Meta:
# Store in cheaper, slower storage
db_tablespace = 'archive_storage'
indexes = [
models.Index(
fields=['original_id'],
name='idx_archive_original_id',
db_tablespace='archive_storage' # Keep indexes with data
),
]
class UserProfile(models.Model):
"""User profile with strategic tablespace usage"""
user = models.OneToOneField('auth.User', on_delete=models.CASCADE)
avatar = models.ImageField(upload_to='avatars/')
bio = models.TextField()
preferences = models.JSONField(default=dict)
# Frequently accessed data
last_login_ip = models.GenericIPAddressField(null=True)
login_count = models.PositiveIntegerField(default=0)
class Meta:
# Store user profiles in fast storage
db_tablespace = 'fast_storage'
indexes = [
# Frequently queried index on fast storage
models.Index(
fields=['user'],
name='idx_profile_user',
db_tablespace='fast_storage'
),
]
class LargeDocument(models.Model):
"""Model for large documents with content separation"""
title = models.CharField(max_length=200)
author = models.ForeignKey('auth.User', on_delete=models.CASCADE)
created_at = models.DateTimeField(auto_now_add=True)
# Large content field - could be stored separately
content = models.TextField()
# Metadata fields
file_size = models.PositiveIntegerField()
mime_type = models.CharField(max_length=100)
class Meta:
# Main table in default tablespace
# Large content could be moved to separate tablespace via partitioning
indexes = [
# Search index on fast storage
models.Index(
fields=['title'],
name='idx_document_title',
db_tablespace='index_storage'
),
# Author index on fast storage
models.Index(
fields=['author', 'created_at'],
name='idx_document_author_date',
db_tablespace='index_storage'
),
]
# management/commands/manage_tablespaces.py
from django.core.management.base import BaseCommand
from django.db import connection
class Command(BaseCommand):
"""Management command for tablespace operations"""
help = 'Manage database tablespaces'
def add_arguments(self, parser):
parser.add_argument('--create-tablespaces', action='store_true')
parser.add_argument('--move-old-data', action='store_true')
parser.add_argument('--analyze-usage', action='store_true')
parser.add_argument('--rebalance', action='store_true')
def handle(self, *args, **options):
if options['create_tablespaces']:
self.create_tablespaces()
if options['move_old_data']:
self.move_old_data_to_archive()
if options['analyze_usage']:
self.analyze_tablespace_usage()
if options['rebalance']:
self.rebalance_tablespaces()
def create_tablespaces(self):
"""Create tablespaces for different storage tiers"""
tablespace_commands = [
# Fast SSD storage for frequently accessed data
"CREATE TABLESPACE IF NOT EXISTS fast_storage LOCATION '/mnt/ssd/postgresql'",
# NVMe storage for indexes
"CREATE TABLESPACE IF NOT EXISTS index_storage LOCATION '/mnt/nvme/postgresql'",
# Archive storage for old data
"CREATE TABLESPACE IF NOT EXISTS archive_storage LOCATION '/mnt/hdd/postgresql'",
# Temporary tablespace for large operations
"CREATE TABLESPACE IF NOT EXISTS temp_storage LOCATION '/mnt/temp/postgresql'",
]
with connection.cursor() as cursor:
for command in tablespace_commands:
try:
cursor.execute(command)
self.stdout.write(f"Executed: {command}")
except Exception as e:
self.stdout.write(f"Error: {e}")
def move_old_data_to_archive(self):
"""Move old data to archive tablespace"""
# Move old analytics data to archive storage
archive_commands = [
"""
ALTER TABLE analytics_event
SET TABLESPACE archive_storage
WHERE timestamp < NOW() - INTERVAL '1 year'
""",
# Move old log entries
"""
ALTER TABLE system_log
SET TABLESPACE archive_storage
WHERE created_at < NOW() - INTERVAL '6 months'
""",
]
with connection.cursor() as cursor:
for command in archive_commands:
try:
cursor.execute(command)
self.stdout.write(f"Moved old data: {command}")
except Exception as e:
self.stdout.write(f"Error moving data: {e}")
def analyze_tablespace_usage(self):
"""Analyze tablespace usage and performance"""
analysis_queries = [
# Tablespace sizes
"""
SELECT
spcname as tablespace_name,
pg_size_pretty(pg_tablespace_size(spcname)) as size
FROM pg_tablespace
WHERE spcname NOT LIKE 'pg_%'
""",
# Tables by tablespace
"""
SELECT
schemaname,
tablename,
COALESCE(spcname, 'default') as tablespace,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables t
LEFT JOIN pg_class c ON c.relname = t.tablename
LEFT JOIN pg_tablespace ts ON ts.oid = c.reltablespace
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
""",
# Index distribution
"""
SELECT
schemaname,
indexname,
tablename,
COALESCE(spcname, 'default') as tablespace,
pg_size_pretty(pg_relation_size(schemaname||'.'||indexname)) as size
FROM pg_indexes i
LEFT JOIN pg_class c ON c.relname = i.indexname
LEFT JOIN pg_tablespace ts ON ts.oid = c.reltablespace
WHERE schemaname = 'public'
ORDER BY pg_relation_size(schemaname||'.'||indexname) DESC
""",
]
with connection.cursor() as cursor:
for query in analysis_queries:
cursor.execute(query)
results = cursor.fetchall()
columns = [desc[0] for desc in cursor.description]
self.stdout.write(f"\n{query[:50]}...")
for row in results:
self.stdout.write(str(dict(zip(columns, row))))
class TablespaceManager:
"""Utility class for tablespace management"""
@staticmethod
def get_tablespace_info():
"""Get information about all tablespaces"""
with connection.cursor() as cursor:
cursor.execute("""
SELECT
spcname,
pg_tablespace_location(oid) as location,
pg_size_pretty(pg_tablespace_size(spcname)) as size,
pg_tablespace_size(spcname) as size_bytes
FROM pg_tablespace
ORDER BY pg_tablespace_size(spcname) DESC
""")
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
@staticmethod
def get_table_tablespace_mapping():
"""Get mapping of tables to tablespaces"""
with connection.cursor() as cursor:
cursor.execute("""
SELECT
n.nspname as schema_name,
c.relname as table_name,
CASE
WHEN c.reltablespace = 0 THEN 'default'
ELSE ts.spcname
END as tablespace_name,
pg_size_pretty(pg_total_relation_size(c.oid)) as total_size,
pg_total_relation_size(c.oid) as size_bytes
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
LEFT JOIN pg_tablespace ts ON ts.oid = c.reltablespace
WHERE c.relkind = 'r'
AND n.nspname = 'public'
ORDER BY pg_total_relation_size(c.oid) DESC
""")
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
@staticmethod
def move_table_to_tablespace(table_name, tablespace_name):
"""Move a table to a different tablespace"""
with connection.cursor() as cursor:
cursor.execute(
f"ALTER TABLE {table_name} SET TABLESPACE {tablespace_name}"
)
@staticmethod
def move_index_to_tablespace(index_name, tablespace_name):
"""Move an index to a different tablespace"""
with connection.cursor() as cursor:
cursor.execute(
f"ALTER INDEX {index_name} SET TABLESPACE {tablespace_name}"
)
class TablespaceOptimizer:
"""Optimize database performance using tablespaces"""
@staticmethod
def optimize_for_read_heavy_workload():
"""Optimize tablespace layout for read-heavy workloads"""
optimization_plan = {
# Move frequently queried tables to fast storage
'fast_storage_tables': [
'auth_user',
'user_profile',
'product_catalog',
'active_sessions',
],
# Move all indexes to fastest storage
'index_storage_indexes': [
'idx_user_email',
'idx_product_category',
'idx_session_user',
'idx_order_date',
],
# Move large, infrequently accessed data to archive
'archive_storage_tables': [
'audit_log',
'old_analytics_data',
'backup_data',
],
}
with connection.cursor() as cursor:
# Move tables to fast storage
for table in optimization_plan['fast_storage_tables']:
try:
cursor.execute(f"ALTER TABLE {table} SET TABLESPACE fast_storage")
print(f"Moved {table} to fast_storage")
except Exception as e:
print(f"Error moving {table}: {e}")
# Move indexes to index storage
for index in optimization_plan['index_storage_indexes']:
try:
cursor.execute(f"ALTER INDEX {index} SET TABLESPACE index_storage")
print(f"Moved {index} to index_storage")
except Exception as e:
print(f"Error moving {index}: {e}")
# Move archive tables
for table in optimization_plan['archive_storage_tables']:
try:
cursor.execute(f"ALTER TABLE {table} SET TABLESPACE archive_storage")
print(f"Moved {table} to archive_storage")
except Exception as e:
print(f"Error moving {table}: {e}")
@staticmethod
def create_partitioned_table_with_tablespaces():
"""Create partitioned table with different tablespaces per partition"""
partition_sql = """
-- Create parent table
CREATE TABLE analytics_events_partitioned (
id SERIAL,
event_date DATE NOT NULL,
event_type VARCHAR(50),
user_id INTEGER,
data JSONB
) PARTITION BY RANGE (event_date);
-- Create partitions for different time periods on different tablespaces
-- Current month on fast storage
CREATE TABLE analytics_events_current PARTITION OF analytics_events_partitioned
FOR VALUES FROM (DATE_TRUNC('month', CURRENT_DATE))
TO (DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 month')
TABLESPACE fast_storage;
-- Previous month on standard storage
CREATE TABLE analytics_events_previous PARTITION OF analytics_events_partitioned
FOR VALUES FROM (DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month')
TO (DATE_TRUNC('month', CURRENT_DATE))
TABLESPACE pg_default;
-- Older data on archive storage
CREATE TABLE analytics_events_archive PARTITION OF analytics_events_partitioned
FOR VALUES FROM ('2020-01-01')
TO (DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month')
TABLESPACE archive_storage;
"""
with connection.cursor() as cursor:
cursor.execute(partition_sql)
@staticmethod
def implement_hot_cold_storage_strategy():
"""Implement hot/cold storage strategy"""
# Define hot and cold data criteria
hot_data_queries = [
# Recent user activity
"""
ALTER TABLE user_activity
SET TABLESPACE fast_storage
WHERE last_activity > NOW() - INTERVAL '30 days'
""",
# Active products
"""
ALTER TABLE products
SET TABLESPACE fast_storage
WHERE is_active = true AND stock_quantity > 0
""",
]
cold_data_queries = [
# Old analytics data
"""
ALTER TABLE analytics_events
SET TABLESPACE archive_storage
WHERE event_date < NOW() - INTERVAL '6 months'
""",
# Inactive user data
"""
ALTER TABLE user_profiles
SET TABLESPACE archive_storage
WHERE user_id IN (
SELECT id FROM auth_user
WHERE last_login < NOW() - INTERVAL '1 year'
)
""",
]
with connection.cursor() as cursor:
for query in hot_data_queries + cold_data_queries:
try:
cursor.execute(query)
except Exception as e:
print(f"Error executing query: {e}")
# Automated tablespace management
class AutomatedTablespaceManager:
"""Automated tablespace management based on usage patterns"""
@staticmethod
def analyze_and_recommend():
"""Analyze usage patterns and recommend tablespace changes"""
analysis_queries = {
'table_access_frequency': """
SELECT
schemaname,
relname as table_name,
seq_scan + idx_scan as total_scans,
n_tup_ins + n_tup_upd + n_tup_del as total_modifications,
pg_size_pretty(pg_total_relation_size(relid)) as size
FROM pg_stat_user_tables
ORDER BY (seq_scan + idx_scan) DESC
""",
'index_usage': """
SELECT
schemaname,
relname as table_name,
indexrelname as index_name,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) as size
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC
""",
'table_io_stats': """
SELECT
schemaname,
relname as table_name,
heap_blks_read,
heap_blks_hit,
CASE
WHEN heap_blks_read + heap_blks_hit = 0 THEN 0
ELSE ROUND(heap_blks_hit::numeric / (heap_blks_read + heap_blks_hit) * 100, 2)
END as cache_hit_ratio
FROM pg_statio_user_tables
ORDER BY heap_blks_read DESC
""",
}
recommendations = {
'move_to_fast_storage': [],
'move_to_archive_storage': [],
'optimize_indexes': [],
}
with connection.cursor() as cursor:
# Analyze table access patterns
cursor.execute(analysis_queries['table_access_frequency'])
table_stats = cursor.fetchall()
for row in table_stats:
schema, table_name, total_scans, modifications, size = row
# High access tables should be on fast storage
if total_scans > 10000:
recommendations['move_to_fast_storage'].append({
'table': f"{schema}.{table_name}",
'reason': f'High access frequency: {total_scans} scans',
'size': size
})
# Low access tables can go to archive
elif total_scans < 100 and modifications < 10:
recommendations['move_to_archive_storage'].append({
'table': f"{schema}.{table_name}",
'reason': f'Low access: {total_scans} scans, {modifications} modifications',
'size': size
})
# Analyze index usage
cursor.execute(analysis_queries['index_usage'])
index_stats = cursor.fetchall()
for row in index_stats:
schema, table_name, index_name, idx_scan, tup_read, tup_fetch, size = row
# Frequently used indexes should be on fast storage
if idx_scan > 5000:
recommendations['optimize_indexes'].append({
'index': f"{schema}.{index_name}",
'table': f"{schema}.{table_name}",
'reason': f'High usage: {idx_scan} scans',
'size': size
})
return recommendations
@staticmethod
def apply_recommendations(recommendations, dry_run=True):
"""Apply tablespace recommendations"""
commands = []
# Generate commands for fast storage moves
for item in recommendations['move_to_fast_storage']:
commands.append(f"ALTER TABLE {item['table']} SET TABLESPACE fast_storage;")
# Generate commands for archive storage moves
for item in recommendations['move_to_archive_storage']:
commands.append(f"ALTER TABLE {item['table']} SET TABLESPACE archive_storage;")
# Generate commands for index optimization
for item in recommendations['optimize_indexes']:
commands.append(f"ALTER INDEX {item['index']} SET TABLESPACE index_storage;")
if dry_run:
print("DRY RUN - Commands that would be executed:")
for cmd in commands:
print(f" {cmd}")
else:
with connection.cursor() as cursor:
for cmd in commands:
try:
cursor.execute(cmd)
print(f"Executed: {cmd}")
except Exception as e:
print(f"Error executing {cmd}: {e}")
return commands
# Monitoring and alerting
class TablespaceMonitor:
"""Monitor tablespace usage and performance"""
@staticmethod
def check_tablespace_usage():
"""Check tablespace disk usage"""
with connection.cursor() as cursor:
cursor.execute("""
SELECT
spcname,
pg_tablespace_location(oid) as location,
pg_size_pretty(pg_tablespace_size(spcname)) as size,
pg_tablespace_size(spcname) as size_bytes
FROM pg_tablespace
WHERE spcname NOT LIKE 'pg_%'
""")
usage_info = []
for row in cursor.fetchall():
spcname, location, size, size_bytes = row
# Check available disk space (would need OS-level check)
usage_info.append({
'tablespace': spcname,
'location': location,
'size': size,
'size_bytes': size_bytes,
})
return usage_info
@staticmethod
def get_performance_metrics():
"""Get performance metrics by tablespace"""
with connection.cursor() as cursor:
cursor.execute("""
SELECT
ts.spcname,
COUNT(c.relname) as table_count,
SUM(pg_total_relation_size(c.oid)) as total_size,
AVG(st.seq_scan + st.idx_scan) as avg_scans,
AVG(CASE
WHEN stio.heap_blks_read + stio.heap_blks_hit = 0 THEN 0
ELSE stio.heap_blks_hit::float / (stio.heap_blks_read + stio.heap_blks_hit)
END) as avg_cache_hit_ratio
FROM pg_tablespace ts
LEFT JOIN pg_class c ON c.reltablespace = ts.oid
LEFT JOIN pg_stat_user_tables st ON st.relid = c.oid
LEFT JOIN pg_statio_user_tables stio ON stio.relid = c.oid
WHERE ts.spcname NOT LIKE 'pg_%'
GROUP BY ts.spcname
""")
columns = [desc[0] for desc in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
Tablespaces provide powerful capabilities for optimizing database performance through strategic data placement. While primarily useful for large-scale applications with significant storage requirements, understanding tablespace concepts helps in designing efficient database architectures.
Multiple Databases
Django supports using multiple databases in a single application, enabling you to separate different types of data, implement read/write splitting, or integrate with legacy systems. Understanding how to configure and use multiple databases effectively is crucial for scalable applications.
Composite Primary Keys
Composite primary keys (also called compound keys) consist of multiple columns that together uniquely identify a record. While Django traditionally uses single-column auto-incrementing primary keys, Django 4.2+ provides experimental support for composite primary keys.