Working with Files

Using Cloud Storage Providers

Cloud storage providers offer scalable, reliable, and cost-effective solutions for handling files in production Django applications. This chapter covers integration with major cloud storage services and best practices for cloud-based file management.

Using Cloud Storage Providers

Cloud storage providers offer scalable, reliable, and cost-effective solutions for handling files in production Django applications. This chapter covers integration with major cloud storage services and best practices for cloud-based file management.

AWS S3 Integration

Installation and Setup

pip install django-storages[boto3]

Configuration

# settings.py
import os

# AWS S3 Configuration
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
AWS_STORAGE_BUCKET_NAME = os.environ.get('AWS_STORAGE_BUCKET_NAME')
AWS_S3_REGION_NAME = os.environ.get('AWS_S3_REGION_NAME', 'us-east-1')

# S3 Storage Settings
AWS_S3_CUSTOM_DOMAIN = f'{AWS_STORAGE_BUCKET_NAME}.s3.amazonaws.com'
AWS_S3_OBJECT_PARAMETERS = {
    'CacheControl': 'max-age=86400',
}
AWS_DEFAULT_ACL = 'public-read'
AWS_S3_FILE_OVERWRITE = False
AWS_QUERYSTRING_AUTH = False

# Storage backends
STORAGES = {
    "default": {
        "BACKEND": "storages.backends.s3boto3.S3Boto3Storage",
        "OPTIONS": {
            "bucket_name": AWS_STORAGE_BUCKET_NAME,
            "region_name": AWS_S3_REGION_NAME,
            "access_key": AWS_ACCESS_KEY_ID,
            "secret_key": AWS_SECRET_ACCESS_KEY,
            "custom_domain": AWS_S3_CUSTOM_DOMAIN,
            "object_parameters": AWS_S3_OBJECT_PARAMETERS,
            "default_acl": AWS_DEFAULT_ACL,
            "querystring_auth": AWS_QUERYSTRING_AUTH,
        },
    },
    "staticfiles": {
        "BACKEND": "storages.backends.s3boto3.S3StaticStorage",
        "OPTIONS": {
            "bucket_name": AWS_STORAGE_BUCKET_NAME,
            "location": "static",
        },
    },
}

Custom S3 Storage Classes

# storage.py
from storages.backends.s3boto3 import S3Boto3Storage

class MediaStorage(S3Boto3Storage):
    """Custom S3 storage for media files"""
    bucket_name = 'my-media-bucket'
    location = 'media'
    default_acl = 'public-read'
    file_overwrite = False

class PrivateMediaStorage(S3Boto3Storage):
    """Private S3 storage for sensitive files"""
    bucket_name = 'my-private-bucket'
    location = 'private'
    default_acl = 'private'
    file_overwrite = False
    custom_domain = False
    querystring_auth = True
    querystring_expire = 3600  # 1 hour

class StaticStorage(S3Boto3Storage):
    """S3 storage for static files"""
    bucket_name = 'my-static-bucket'
    location = 'static'
    default_acl = 'public-read'
    
# Using in models
class Document(models.Model):
    title = models.CharField(max_length=200)
    public_file = models.FileField(
        upload_to='documents/',
        storage=MediaStorage()
    )
    private_file = models.FileField(
        upload_to='private/',
        storage=PrivateMediaStorage()
    )

S3 Advanced Configuration

# settings.py

# CloudFront CDN
AWS_S3_CUSTOM_DOMAIN = 'cdn.example.com'
AWS_S3_URL_PROTOCOL = 'https:'

# Security settings
AWS_S3_SECURE_URLS = True
AWS_S3_USE_SSL = True

# Performance settings
AWS_S3_MAX_MEMORY_SIZE = 100 * 1024 * 1024  # 100MB
AWS_S3_TRANSFER_CONFIG = {
    'multipart_threshold': 1024 * 25,  # 25MB
    'max_concurrency': 10,
    'multipart_chunksize': 1024 * 25,
    'use_threads': True
}

# Lifecycle management
AWS_S3_OBJECT_PARAMETERS = {
    'CacheControl': 'max-age=86400',
    'StorageClass': 'STANDARD_IA',  # Infrequent Access
}

Google Cloud Storage

Installation and Setup

pip install django-storages[google]

Configuration

# settings.py
import os

# Google Cloud Storage
GS_BUCKET_NAME = os.environ.get('GS_BUCKET_NAME')
GS_PROJECT_ID = os.environ.get('GS_PROJECT_ID')
GS_CREDENTIALS = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')

# GCS Storage Settings
GS_DEFAULT_ACL = 'publicRead'
GS_FILE_OVERWRITE = False
GS_MAX_MEMORY_SIZE = 100 * 1024 * 1024  # 100MB

STORAGES = {
    "default": {
        "BACKEND": "storages.backends.gcloud.GoogleCloudStorage",
        "OPTIONS": {
            "bucket_name": GS_BUCKET_NAME,
            "project_id": GS_PROJECT_ID,
            "credentials": GS_CREDENTIALS,
            "default_acl": GS_DEFAULT_ACL,
            "file_overwrite": GS_FILE_OVERWRITE,
        },
    },
}

Custom GCS Storage

# storage.py
from storages.backends.gcloud import GoogleCloudStorage

class CustomGoogleCloudStorage(GoogleCloudStorage):
    """Custom Google Cloud Storage"""
    bucket_name = 'my-gcs-bucket'
    location = 'media'
    default_acl = 'publicRead'
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Custom initialization
        self.custom_endpoint = None
        
    def url(self, name):
        """Custom URL generation"""
        if self.custom_endpoint:
            return f"{self.custom_endpoint}/{name}"
        return super().url(name)

# Service account authentication
from google.oauth2 import service_account

class AuthenticatedGCSStorage(GoogleCloudStorage):
    """GCS with service account authentication"""
    
    def __init__(self, *args, **kwargs):
        # Load service account credentials
        credentials = service_account.Credentials.from_service_account_file(
            '/path/to/service-account.json'
        )
        kwargs['credentials'] = credentials
        super().__init__(*args, **kwargs)

Azure Blob Storage

Installation and Setup

pip install django-storages[azure]

Configuration

# settings.py
import os

# Azure Blob Storage
AZURE_ACCOUNT_NAME = os.environ.get('AZURE_ACCOUNT_NAME')
AZURE_ACCOUNT_KEY = os.environ.get('AZURE_ACCOUNT_KEY')
AZURE_CONTAINER = os.environ.get('AZURE_CONTAINER')

STORAGES = {
    "default": {
        "BACKEND": "storages.backends.azure_storage.AzureStorage",
        "OPTIONS": {
            "account_name": AZURE_ACCOUNT_NAME,
            "account_key": AZURE_ACCOUNT_KEY,
            "azure_container": AZURE_CONTAINER,
            "expiration_secs": 3600,
        },
    },
}

Custom Azure Storage

# storage.py
from storages.backends.azure_storage import AzureStorage

class CustomAzureStorage(AzureStorage):
    """Custom Azure Blob Storage"""
    account_name = 'mystorageaccount'
    azure_container = 'media'
    expiration_secs = 3600
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Custom settings
        self.overwrite_files = False
        self.azure_ssl = True

Multi-Cloud Storage Strategy

Hybrid Storage Configuration

# storage.py
from django.conf import settings
from storages.backends.s3boto3 import S3Boto3Storage
from storages.backends.gcloud import GoogleCloudStorage

class HybridStorage:
    """Route files to different cloud providers based on criteria"""
    
    def __init__(self):
        self.s3_storage = S3Boto3Storage()
        self.gcs_storage = GoogleCloudStorage()
    
    def get_storage(self, filename):
        """Choose storage based on file type or size"""
        if filename.endswith(('.jpg', '.png', '.gif')):
            # Images to S3 with CloudFront
            return self.s3_storage
        elif filename.endswith(('.mp4', '.avi', '.mov')):
            # Videos to Google Cloud Storage
            return self.gcs_storage
        else:
            # Default to S3
            return self.s3_storage

# Usage in models
class MediaFile(models.Model):
    name = models.CharField(max_length=255)
    file = models.FileField(upload_to='uploads/')
    
    def save(self, *args, **kwargs):
        if self.file:
            hybrid = HybridStorage()
            storage = hybrid.get_storage(self.file.name)
            self.file.storage = storage
        super().save(*args, **kwargs)

Failover Storage

# storage.py
import logging
from django.core.files.storage import Storage

logger = logging.getLogger(__name__)

class FailoverStorage(Storage):
    """Storage with automatic failover between providers"""
    
    def __init__(self, primary_storage, backup_storage):
        self.primary_storage = primary_storage
        self.backup_storage = backup_storage
    
    def _save(self, name, content):
        """Try primary storage, fallback to backup"""
        try:
            return self.primary_storage._save(name, content)
        except Exception as e:
            logger.warning(f"Primary storage failed: {e}")
            logger.info("Falling back to backup storage")
            return self.backup_storage._save(name, content)
    
    def _open(self, name, mode='rb'):
        """Try to open from either storage"""
        try:
            return self.primary_storage._open(name, mode)
        except Exception:
            return self.backup_storage._open(name, mode)
    
    def exists(self, name):
        """Check both storages"""
        return (self.primary_storage.exists(name) or 
                self.backup_storage.exists(name))
    
    def delete(self, name):
        """Delete from both storages"""
        try:
            self.primary_storage.delete(name)
        except Exception:
            pass
        
        try:
            self.backup_storage.delete(name)
        except Exception:
            pass

Cloud Storage Optimization

Caching and CDN Integration

# settings.py

# CloudFront CDN for S3
AWS_S3_CUSTOM_DOMAIN = 'cdn.example.com'
AWS_CLOUDFRONT_DOMAIN = 'cdn.example.com'

# Cache headers
AWS_S3_OBJECT_PARAMETERS = {
    'CacheControl': 'max-age=86400',
    'Expires': 'Thu, 31 Dec 2099 20:00:00 GMT',
}

# Google Cloud CDN
GS_CUSTOM_ENDPOINT = 'https://cdn.example.com'

# Azure CDN
AZURE_CUSTOM_DOMAIN = 'cdn.example.com'

Compression and Optimization

# storage.py
import gzip
import io
from django.core.files.base import ContentFile

class CompressedStorage(S3Boto3Storage):
    """S3 storage with automatic compression"""
    
    def _save(self, name, content):
        """Compress files before saving"""
        
        # Compress text files
        if name.endswith(('.css', '.js', '.html', '.txt', '.json')):
            content.seek(0)
            original_content = content.read()
            
            # Compress content
            compressed_content = gzip.compress(original_content)
            
            # Create new content file
            compressed_file = ContentFile(compressed_content)
            
            # Set content encoding
            self.object_parameters = {
                **self.object_parameters,
                'ContentEncoding': 'gzip'
            }
            
            return super()._save(name, compressed_file)
        
        return super()._save(name, content)

Image Processing Pipeline

# storage.py
from PIL import Image
import io
from django.core.files.base import ContentFile

class ImageProcessingStorage(S3Boto3Storage):
    """S3 storage with automatic image processing"""
    
    def _save(self, name, content):
        """Process images before saving"""
        
        if self._is_image(name):
            processed_content = self._process_image(content)
            return super()._save(name, processed_content)
        
        return super()._save(name, content)
    
    def _is_image(self, name):
        """Check if file is an image"""
        return name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp'))
    
    def _process_image(self, content):
        """Process and optimize image"""
        content.seek(0)
        
        # Open image
        image = Image.open(content)
        
        # Convert to RGB if necessary
        if image.mode in ('RGBA', 'LA', 'P'):
            image = image.convert('RGB')
        
        # Resize if too large
        max_size = (1920, 1080)
        if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
            image.thumbnail(max_size, Image.Resampling.LANCZOS)
        
        # Save optimized image
        output = io.BytesIO()
        image.save(output, format='JPEG', quality=85, optimize=True)
        output.seek(0)
        
        return ContentFile(output.getvalue())

Security and Access Control

Signed URLs for Private Files

# views.py
from django.http import HttpResponseRedirect
from django.contrib.auth.decorators import login_required
from botocore.exceptions import ClientError
import boto3

@login_required
def serve_private_file(request, file_id):
    """Serve private file with signed URL"""
    
    try:
        # Get file record
        file_obj = PrivateFile.objects.get(id=file_id, user=request.user)
        
        # Generate signed URL
        s3_client = boto3.client('s3')
        signed_url = s3_client.generate_presigned_url(
            'get_object',
            Params={
                'Bucket': settings.AWS_PRIVATE_BUCKET_NAME,
                'Key': file_obj.file.name
            },
            ExpiresIn=3600  # 1 hour
        )
        
        return HttpResponseRedirect(signed_url)
        
    except (PrivateFile.DoesNotExist, ClientError):
        return HttpResponseForbidden()

Access Control with IAM Policies

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": [
        "s3:GetObject",
        "s3:PutObject",
        "s3:DeleteObject"
      ],
      "Resource": "arn:aws:s3:::my-bucket/uploads/*"
    },
    {
      "Effect": "Allow",
      "Action": [
        "s3:ListBucket"
      ],
      "Resource": "arn:aws:s3:::my-bucket",
      "Condition": {
        "StringLike": {
          "s3:prefix": "uploads/*"
        }
      }
    }
  ]
}

Monitoring and Analytics

Cloud Storage Metrics

# monitoring.py
import boto3
from django.core.management.base import BaseCommand

class Command(BaseCommand):
    """Monitor S3 storage usage"""
    
    def handle(self, *args, **options):
        s3_client = boto3.client('s3')
        cloudwatch = boto3.client('cloudwatch')
        
        # Get bucket metrics
        response = cloudwatch.get_metric_statistics(
            Namespace='AWS/S3',
            MetricName='BucketSizeBytes',
            Dimensions=[
                {
                    'Name': 'BucketName',
                    'Value': settings.AWS_STORAGE_BUCKET_NAME
                },
                {
                    'Name': 'StorageType',
                    'Value': 'StandardStorage'
                }
            ],
            StartTime=datetime.utcnow() - timedelta(days=1),
            EndTime=datetime.utcnow(),
            Period=86400,
            Statistics=['Average']
        )
        
        if response['Datapoints']:
            size_bytes = response['Datapoints'][0]['Average']
            size_gb = size_bytes / (1024**3)
            self.stdout.write(f"Bucket size: {size_gb:.2f} GB")

Best Practices

Performance

  • Use CDN for static assets and frequently accessed files
  • Implement proper caching headers
  • Compress files when appropriate
  • Use multipart uploads for large files

Security

  • Use IAM roles and policies for access control
  • Implement signed URLs for private files
  • Validate file types and content
  • Enable versioning and backup strategies

Cost Optimization

  • Use appropriate storage classes (Standard, IA, Glacier)
  • Implement lifecycle policies
  • Monitor usage and costs
  • Clean up unused files regularly

Reliability

  • Implement failover strategies
  • Use multiple regions for critical files
  • Monitor service health and performance
  • Have backup and recovery plans

Next Steps

Now that you understand cloud storage integration, let's explore how to manage media files effectively in production environments, including optimization, security, and scaling strategies.