Models and Databases

Tablespaces

Tablespaces provide a way to define locations in the file system where database objects can be stored. While primarily a PostgreSQL and Oracle feature, understanding tablespaces helps optimize database performance by strategically placing data on different storage devices.

Tablespaces

Tablespaces provide a way to define locations in the file system where database objects can be stored. While primarily a PostgreSQL and Oracle feature, understanding tablespaces helps optimize database performance by strategically placing data on different storage devices.

Understanding Tablespaces

What Are Tablespaces?

# Tablespaces allow you to:
# 1. Store large tables on faster storage (SSD)
# 2. Separate indexes from table data
# 3. Distribute I/O load across multiple disks
# 4. Implement storage tiering strategies
# 5. Manage disk space more effectively

# PostgreSQL tablespace example in Django settings
DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.postgresql',
        'NAME': 'myapp_db',
        'USER': 'postgres',
        'PASSWORD': 'password',
        'HOST': 'localhost',
        'PORT': '5432',
        'OPTIONS': {
            'options': '-c default_tablespace=fast_storage'
        },
    }
}

# Tablespace configuration in PostgreSQL
"""
-- Create tablespaces (run as PostgreSQL superuser)
CREATE TABLESPACE fast_storage LOCATION '/mnt/ssd/postgresql';
CREATE TABLESPACE archive_storage LOCATION '/mnt/hdd/postgresql';
CREATE TABLESPACE index_storage LOCATION '/mnt/nvme/postgresql';
"""

Model-Level Tablespace Configuration

# models.py
from django.db import models

class HighVolumeData(models.Model):
    """Model with tablespace configuration for high-volume data"""
    
    timestamp = models.DateTimeField(auto_now_add=True)
    event_type = models.CharField(max_length=50)
    user_id = models.IntegerField()
    data = models.JSONField()
    
    class Meta:
        # Store table in fast storage tablespace
        db_tablespace = 'fast_storage'
        
        # Indexes can use different tablespace
        indexes = [
            models.Index(
                fields=['timestamp', 'event_type'],
                name='idx_timestamp_event',
                db_tablespace='index_storage'
            ),
            models.Index(
                fields=['user_id'],
                name='idx_user_id',
                db_tablespace='index_storage'
            ),
        ]

class ArchiveData(models.Model):
    """Model for archived data using slower storage"""
    
    original_id = models.IntegerField()
    archived_at = models.DateTimeField(auto_now_add=True)
    data_snapshot = models.JSONField()
    
    class Meta:
        # Store in cheaper, slower storage
        db_tablespace = 'archive_storage'
        
        indexes = [
            models.Index(
                fields=['original_id'],
                name='idx_archive_original_id',
                db_tablespace='archive_storage'  # Keep indexes with data
            ),
        ]

class UserProfile(models.Model):
    """User profile with strategic tablespace usage"""
    
    user = models.OneToOneField('auth.User', on_delete=models.CASCADE)
    avatar = models.ImageField(upload_to='avatars/')
    bio = models.TextField()
    preferences = models.JSONField(default=dict)
    
    # Frequently accessed data
    last_login_ip = models.GenericIPAddressField(null=True)
    login_count = models.PositiveIntegerField(default=0)
    
    class Meta:
        # Store user profiles in fast storage
        db_tablespace = 'fast_storage'
        
        indexes = [
            # Frequently queried index on fast storage
            models.Index(
                fields=['user'],
                name='idx_profile_user',
                db_tablespace='fast_storage'
            ),
        ]

class LargeDocument(models.Model):
    """Model for large documents with content separation"""
    
    title = models.CharField(max_length=200)
    author = models.ForeignKey('auth.User', on_delete=models.CASCADE)
    created_at = models.DateTimeField(auto_now_add=True)
    
    # Large content field - could be stored separately
    content = models.TextField()
    
    # Metadata fields
    file_size = models.PositiveIntegerField()
    mime_type = models.CharField(max_length=100)
    
    class Meta:
        # Main table in default tablespace
        # Large content could be moved to separate tablespace via partitioning
        
        indexes = [
            # Search index on fast storage
            models.Index(
                fields=['title'],
                name='idx_document_title',
                db_tablespace='index_storage'
            ),
            # Author index on fast storage
            models.Index(
                fields=['author', 'created_at'],
                name='idx_document_author_date',
                db_tablespace='index_storage'
            ),
        ]

Tablespace Management Strategies

Storage Tiering Implementation

# management/commands/manage_tablespaces.py
from django.core.management.base import BaseCommand
from django.db import connection

class Command(BaseCommand):
    """Management command for tablespace operations"""
    
    help = 'Manage database tablespaces'
    
    def add_arguments(self, parser):
        parser.add_argument('--create-tablespaces', action='store_true')
        parser.add_argument('--move-old-data', action='store_true')
        parser.add_argument('--analyze-usage', action='store_true')
        parser.add_argument('--rebalance', action='store_true')
    
    def handle(self, *args, **options):
        if options['create_tablespaces']:
            self.create_tablespaces()
        
        if options['move_old_data']:
            self.move_old_data_to_archive()
        
        if options['analyze_usage']:
            self.analyze_tablespace_usage()
        
        if options['rebalance']:
            self.rebalance_tablespaces()
    
    def create_tablespaces(self):
        """Create tablespaces for different storage tiers"""
        
        tablespace_commands = [
            # Fast SSD storage for frequently accessed data
            "CREATE TABLESPACE IF NOT EXISTS fast_storage LOCATION '/mnt/ssd/postgresql'",
            
            # NVMe storage for indexes
            "CREATE TABLESPACE IF NOT EXISTS index_storage LOCATION '/mnt/nvme/postgresql'",
            
            # Archive storage for old data
            "CREATE TABLESPACE IF NOT EXISTS archive_storage LOCATION '/mnt/hdd/postgresql'",
            
            # Temporary tablespace for large operations
            "CREATE TABLESPACE IF NOT EXISTS temp_storage LOCATION '/mnt/temp/postgresql'",
        ]
        
        with connection.cursor() as cursor:
            for command in tablespace_commands:
                try:
                    cursor.execute(command)
                    self.stdout.write(f"Executed: {command}")
                except Exception as e:
                    self.stdout.write(f"Error: {e}")
    
    def move_old_data_to_archive(self):
        """Move old data to archive tablespace"""
        
        # Move old analytics data to archive storage
        archive_commands = [
            """
            ALTER TABLE analytics_event 
            SET TABLESPACE archive_storage
            WHERE timestamp < NOW() - INTERVAL '1 year'
            """,
            
            # Move old log entries
            """
            ALTER TABLE system_log 
            SET TABLESPACE archive_storage
            WHERE created_at < NOW() - INTERVAL '6 months'
            """,
        ]
        
        with connection.cursor() as cursor:
            for command in archive_commands:
                try:
                    cursor.execute(command)
                    self.stdout.write(f"Moved old data: {command}")
                except Exception as e:
                    self.stdout.write(f"Error moving data: {e}")
    
    def analyze_tablespace_usage(self):
        """Analyze tablespace usage and performance"""
        
        analysis_queries = [
            # Tablespace sizes
            """
            SELECT 
                spcname as tablespace_name,
                pg_size_pretty(pg_tablespace_size(spcname)) as size
            FROM pg_tablespace
            WHERE spcname NOT LIKE 'pg_%'
            """,
            
            # Tables by tablespace
            """
            SELECT 
                schemaname,
                tablename,
                COALESCE(spcname, 'default') as tablespace,
                pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
            FROM pg_tables t
            LEFT JOIN pg_class c ON c.relname = t.tablename
            LEFT JOIN pg_tablespace ts ON ts.oid = c.reltablespace
            WHERE schemaname = 'public'
            ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
            """,
            
            # Index distribution
            """
            SELECT 
                schemaname,
                indexname,
                tablename,
                COALESCE(spcname, 'default') as tablespace,
                pg_size_pretty(pg_relation_size(schemaname||'.'||indexname)) as size
            FROM pg_indexes i
            LEFT JOIN pg_class c ON c.relname = i.indexname
            LEFT JOIN pg_tablespace ts ON ts.oid = c.reltablespace
            WHERE schemaname = 'public'
            ORDER BY pg_relation_size(schemaname||'.'||indexname) DESC
            """,
        ]
        
        with connection.cursor() as cursor:
            for query in analysis_queries:
                cursor.execute(query)
                results = cursor.fetchall()
                columns = [desc[0] for desc in cursor.description]
                
                self.stdout.write(f"\n{query[:50]}...")
                for row in results:
                    self.stdout.write(str(dict(zip(columns, row))))

class TablespaceManager:
    """Utility class for tablespace management"""
    
    @staticmethod
    def get_tablespace_info():
        """Get information about all tablespaces"""
        
        with connection.cursor() as cursor:
            cursor.execute("""
                SELECT 
                    spcname,
                    pg_tablespace_location(oid) as location,
                    pg_size_pretty(pg_tablespace_size(spcname)) as size,
                    pg_tablespace_size(spcname) as size_bytes
                FROM pg_tablespace
                ORDER BY pg_tablespace_size(spcname) DESC
            """)
            
            columns = [desc[0] for desc in cursor.description]
            return [dict(zip(columns, row)) for row in cursor.fetchall()]
    
    @staticmethod
    def get_table_tablespace_mapping():
        """Get mapping of tables to tablespaces"""
        
        with connection.cursor() as cursor:
            cursor.execute("""
                SELECT 
                    n.nspname as schema_name,
                    c.relname as table_name,
                    CASE 
                        WHEN c.reltablespace = 0 THEN 'default'
                        ELSE ts.spcname 
                    END as tablespace_name,
                    pg_size_pretty(pg_total_relation_size(c.oid)) as total_size,
                    pg_total_relation_size(c.oid) as size_bytes
                FROM pg_class c
                JOIN pg_namespace n ON n.oid = c.relnamespace
                LEFT JOIN pg_tablespace ts ON ts.oid = c.reltablespace
                WHERE c.relkind = 'r'
                    AND n.nspname = 'public'
                ORDER BY pg_total_relation_size(c.oid) DESC
            """)
            
            columns = [desc[0] for desc in cursor.description]
            return [dict(zip(columns, row)) for row in cursor.fetchall()]
    
    @staticmethod
    def move_table_to_tablespace(table_name, tablespace_name):
        """Move a table to a different tablespace"""
        
        with connection.cursor() as cursor:
            cursor.execute(
                f"ALTER TABLE {table_name} SET TABLESPACE {tablespace_name}"
            )
    
    @staticmethod
    def move_index_to_tablespace(index_name, tablespace_name):
        """Move an index to a different tablespace"""
        
        with connection.cursor() as cursor:
            cursor.execute(
                f"ALTER INDEX {index_name} SET TABLESPACE {tablespace_name}"
            )

Performance Optimization with Tablespaces

class TablespaceOptimizer:
    """Optimize database performance using tablespaces"""
    
    @staticmethod
    def optimize_for_read_heavy_workload():
        """Optimize tablespace layout for read-heavy workloads"""
        
        optimization_plan = {
            # Move frequently queried tables to fast storage
            'fast_storage_tables': [
                'auth_user',
                'user_profile',
                'product_catalog',
                'active_sessions',
            ],
            
            # Move all indexes to fastest storage
            'index_storage_indexes': [
                'idx_user_email',
                'idx_product_category',
                'idx_session_user',
                'idx_order_date',
            ],
            
            # Move large, infrequently accessed data to archive
            'archive_storage_tables': [
                'audit_log',
                'old_analytics_data',
                'backup_data',
            ],
        }
        
        with connection.cursor() as cursor:
            # Move tables to fast storage
            for table in optimization_plan['fast_storage_tables']:
                try:
                    cursor.execute(f"ALTER TABLE {table} SET TABLESPACE fast_storage")
                    print(f"Moved {table} to fast_storage")
                except Exception as e:
                    print(f"Error moving {table}: {e}")
            
            # Move indexes to index storage
            for index in optimization_plan['index_storage_indexes']:
                try:
                    cursor.execute(f"ALTER INDEX {index} SET TABLESPACE index_storage")
                    print(f"Moved {index} to index_storage")
                except Exception as e:
                    print(f"Error moving {index}: {e}")
            
            # Move archive tables
            for table in optimization_plan['archive_storage_tables']:
                try:
                    cursor.execute(f"ALTER TABLE {table} SET TABLESPACE archive_storage")
                    print(f"Moved {table} to archive_storage")
                except Exception as e:
                    print(f"Error moving {table}: {e}")
    
    @staticmethod
    def create_partitioned_table_with_tablespaces():
        """Create partitioned table with different tablespaces per partition"""
        
        partition_sql = """
        -- Create parent table
        CREATE TABLE analytics_events_partitioned (
            id SERIAL,
            event_date DATE NOT NULL,
            event_type VARCHAR(50),
            user_id INTEGER,
            data JSONB
        ) PARTITION BY RANGE (event_date);
        
        -- Create partitions for different time periods on different tablespaces
        
        -- Current month on fast storage
        CREATE TABLE analytics_events_current PARTITION OF analytics_events_partitioned
        FOR VALUES FROM (DATE_TRUNC('month', CURRENT_DATE)) 
        TO (DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 month')
        TABLESPACE fast_storage;
        
        -- Previous month on standard storage
        CREATE TABLE analytics_events_previous PARTITION OF analytics_events_partitioned
        FOR VALUES FROM (DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month') 
        TO (DATE_TRUNC('month', CURRENT_DATE))
        TABLESPACE pg_default;
        
        -- Older data on archive storage
        CREATE TABLE analytics_events_archive PARTITION OF analytics_events_partitioned
        FOR VALUES FROM ('2020-01-01') 
        TO (DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month')
        TABLESPACE archive_storage;
        """
        
        with connection.cursor() as cursor:
            cursor.execute(partition_sql)
    
    @staticmethod
    def implement_hot_cold_storage_strategy():
        """Implement hot/cold storage strategy"""
        
        # Define hot and cold data criteria
        hot_data_queries = [
            # Recent user activity
            """
            ALTER TABLE user_activity 
            SET TABLESPACE fast_storage
            WHERE last_activity > NOW() - INTERVAL '30 days'
            """,
            
            # Active products
            """
            ALTER TABLE products 
            SET TABLESPACE fast_storage
            WHERE is_active = true AND stock_quantity > 0
            """,
        ]
        
        cold_data_queries = [
            # Old analytics data
            """
            ALTER TABLE analytics_events 
            SET TABLESPACE archive_storage
            WHERE event_date < NOW() - INTERVAL '6 months'
            """,
            
            # Inactive user data
            """
            ALTER TABLE user_profiles 
            SET TABLESPACE archive_storage
            WHERE user_id IN (
                SELECT id FROM auth_user 
                WHERE last_login < NOW() - INTERVAL '1 year'
            )
            """,
        ]
        
        with connection.cursor() as cursor:
            for query in hot_data_queries + cold_data_queries:
                try:
                    cursor.execute(query)
                except Exception as e:
                    print(f"Error executing query: {e}")

# Automated tablespace management
class AutomatedTablespaceManager:
    """Automated tablespace management based on usage patterns"""
    
    @staticmethod
    def analyze_and_recommend():
        """Analyze usage patterns and recommend tablespace changes"""
        
        analysis_queries = {
            'table_access_frequency': """
                SELECT 
                    schemaname,
                    relname as table_name,
                    seq_scan + idx_scan as total_scans,
                    n_tup_ins + n_tup_upd + n_tup_del as total_modifications,
                    pg_size_pretty(pg_total_relation_size(relid)) as size
                FROM pg_stat_user_tables
                ORDER BY (seq_scan + idx_scan) DESC
            """,
            
            'index_usage': """
                SELECT 
                    schemaname,
                    relname as table_name,
                    indexrelname as index_name,
                    idx_scan,
                    idx_tup_read,
                    idx_tup_fetch,
                    pg_size_pretty(pg_relation_size(indexrelid)) as size
                FROM pg_stat_user_indexes
                ORDER BY idx_scan DESC
            """,
            
            'table_io_stats': """
                SELECT 
                    schemaname,
                    relname as table_name,
                    heap_blks_read,
                    heap_blks_hit,
                    CASE 
                        WHEN heap_blks_read + heap_blks_hit = 0 THEN 0
                        ELSE ROUND(heap_blks_hit::numeric / (heap_blks_read + heap_blks_hit) * 100, 2)
                    END as cache_hit_ratio
                FROM pg_statio_user_tables
                ORDER BY heap_blks_read DESC
            """,
        }
        
        recommendations = {
            'move_to_fast_storage': [],
            'move_to_archive_storage': [],
            'optimize_indexes': [],
        }
        
        with connection.cursor() as cursor:
            # Analyze table access patterns
            cursor.execute(analysis_queries['table_access_frequency'])
            table_stats = cursor.fetchall()
            
            for row in table_stats:
                schema, table_name, total_scans, modifications, size = row
                
                # High access tables should be on fast storage
                if total_scans > 10000:
                    recommendations['move_to_fast_storage'].append({
                        'table': f"{schema}.{table_name}",
                        'reason': f'High access frequency: {total_scans} scans',
                        'size': size
                    })
                
                # Low access tables can go to archive
                elif total_scans < 100 and modifications < 10:
                    recommendations['move_to_archive_storage'].append({
                        'table': f"{schema}.{table_name}",
                        'reason': f'Low access: {total_scans} scans, {modifications} modifications',
                        'size': size
                    })
            
            # Analyze index usage
            cursor.execute(analysis_queries['index_usage'])
            index_stats = cursor.fetchall()
            
            for row in index_stats:
                schema, table_name, index_name, idx_scan, tup_read, tup_fetch, size = row
                
                # Frequently used indexes should be on fast storage
                if idx_scan > 5000:
                    recommendations['optimize_indexes'].append({
                        'index': f"{schema}.{index_name}",
                        'table': f"{schema}.{table_name}",
                        'reason': f'High usage: {idx_scan} scans',
                        'size': size
                    })
        
        return recommendations
    
    @staticmethod
    def apply_recommendations(recommendations, dry_run=True):
        """Apply tablespace recommendations"""
        
        commands = []
        
        # Generate commands for fast storage moves
        for item in recommendations['move_to_fast_storage']:
            commands.append(f"ALTER TABLE {item['table']} SET TABLESPACE fast_storage;")
        
        # Generate commands for archive storage moves
        for item in recommendations['move_to_archive_storage']:
            commands.append(f"ALTER TABLE {item['table']} SET TABLESPACE archive_storage;")
        
        # Generate commands for index optimization
        for item in recommendations['optimize_indexes']:
            commands.append(f"ALTER INDEX {item['index']} SET TABLESPACE index_storage;")
        
        if dry_run:
            print("DRY RUN - Commands that would be executed:")
            for cmd in commands:
                print(f"  {cmd}")
        else:
            with connection.cursor() as cursor:
                for cmd in commands:
                    try:
                        cursor.execute(cmd)
                        print(f"Executed: {cmd}")
                    except Exception as e:
                        print(f"Error executing {cmd}: {e}")
        
        return commands

# Monitoring and alerting
class TablespaceMonitor:
    """Monitor tablespace usage and performance"""
    
    @staticmethod
    def check_tablespace_usage():
        """Check tablespace disk usage"""
        
        with connection.cursor() as cursor:
            cursor.execute("""
                SELECT 
                    spcname,
                    pg_tablespace_location(oid) as location,
                    pg_size_pretty(pg_tablespace_size(spcname)) as size,
                    pg_tablespace_size(spcname) as size_bytes
                FROM pg_tablespace
                WHERE spcname NOT LIKE 'pg_%'
            """)
            
            usage_info = []
            for row in cursor.fetchall():
                spcname, location, size, size_bytes = row
                
                # Check available disk space (would need OS-level check)
                usage_info.append({
                    'tablespace': spcname,
                    'location': location,
                    'size': size,
                    'size_bytes': size_bytes,
                })
            
            return usage_info
    
    @staticmethod
    def get_performance_metrics():
        """Get performance metrics by tablespace"""
        
        with connection.cursor() as cursor:
            cursor.execute("""
                SELECT 
                    ts.spcname,
                    COUNT(c.relname) as table_count,
                    SUM(pg_total_relation_size(c.oid)) as total_size,
                    AVG(st.seq_scan + st.idx_scan) as avg_scans,
                    AVG(CASE 
                        WHEN stio.heap_blks_read + stio.heap_blks_hit = 0 THEN 0
                        ELSE stio.heap_blks_hit::float / (stio.heap_blks_read + stio.heap_blks_hit)
                    END) as avg_cache_hit_ratio
                FROM pg_tablespace ts
                LEFT JOIN pg_class c ON c.reltablespace = ts.oid
                LEFT JOIN pg_stat_user_tables st ON st.relid = c.oid
                LEFT JOIN pg_statio_user_tables stio ON stio.relid = c.oid
                WHERE ts.spcname NOT LIKE 'pg_%'
                GROUP BY ts.spcname
            """)
            
            columns = [desc[0] for desc in cursor.description]
            return [dict(zip(columns, row)) for row in cursor.fetchall()]

Tablespaces provide powerful capabilities for optimizing database performance through strategic data placement. While primarily useful for large-scale applications with significant storage requirements, understanding tablespace concepts helps in designing efficient database architectures.