Django's file handling system centers around the File object, which provides a consistent interface for working with files regardless of their storage backend. Understanding the File object is crucial for advanced file manipulation and custom storage implementations.
When users upload files through forms, Django creates UploadedFile objects:
from django.http import HttpResponse
from django.views.decorators.csrf import csrf_exempt
@csrf_exempt
def upload_file(request):
if request.method == 'POST' and request.FILES:
uploaded_file = request.FILES['file']
# File properties
print(f"Name: {uploaded_file.name}")
print(f"Size: {uploaded_file.size}")
print(f"Content Type: {uploaded_file.content_type}")
print(f"Charset: {uploaded_file.charset}")
# Read file content
content = uploaded_file.read()
# Reset file pointer
uploaded_file.seek(0)
return HttpResponse("File uploaded successfully")
Django handles small and large files differently:
from django.core.files.uploadedfile import InMemoryUploadedFile, TemporaryUploadedFile
def handle_upload(request):
uploaded_file = request.FILES['file']
if isinstance(uploaded_file, InMemoryUploadedFile):
# Small files (< FILE_UPLOAD_MAX_MEMORY_SIZE)
print("File stored in memory")
content = uploaded_file.read()
elif isinstance(uploaded_file, TemporaryUploadedFile):
# Large files stored in temporary location
print(f"Temporary file: {uploaded_file.temporary_file_path()}")
# Process large file in chunks
for chunk in uploaded_file.chunks():
# Process chunk
pass
def examine_file(file_obj):
"""Examine a Django File object"""
# Basic properties
print(f"Name: {file_obj.name}")
print(f"Size: {file_obj.size}")
print(f"URL: {file_obj.url}")
# File operations
print(f"Closed: {file_obj.closed}")
print(f"Mode: {getattr(file_obj, 'mode', 'N/A')}")
# Read operations
content = file_obj.read()
file_obj.seek(0) # Reset position
# Chunk reading for large files
chunks = []
for chunk in file_obj.chunks():
chunks.append(chunk)
from django.core.files.base import ContentFile
from django.core.files import File
import os
def manipulate_files():
# Create file from content
content = b"Hello, World!"
content_file = ContentFile(content, name='hello.txt')
# Create file from existing file
with open('existing_file.txt', 'rb') as f:
django_file = File(f, name='django_file.txt')
# Copy content
copied_content = django_file.read()
# Create new file
new_file = ContentFile(copied_content, name='copied_file.txt')
return content_file, new_file
def read_file_content(file_field):
"""Different ways to read file content"""
# Method 1: Read entire file
file_field.open('rb')
content = file_field.read()
file_field.close()
# Method 2: Using context manager
with file_field.open('rb') as f:
content = f.read()
# Method 3: Read in chunks (for large files)
chunks = []
with file_field.open('rb') as f:
for chunk in f.chunks():
chunks.append(chunk)
return content
# Usage with model instance
document = Document.objects.get(pk=1)
content = read_file_content(document.file)
from django.core.files.base import ContentFile
def create_and_save_file(model_instance):
"""Create and save file content"""
# Generate content
content = "Generated file content\n"
content += f"Created at: {timezone.now()}\n"
# Create ContentFile
file_content = ContentFile(content.encode('utf-8'))
# Save to model field
model_instance.file.save(
'generated_file.txt',
file_content,
save=True # Save the model instance
)
from PIL import Image
from django.core.files.base import ContentFile
import io
def process_image(image_field):
"""Process uploaded image"""
# Open image with PIL
with image_field.open('rb') as f:
image = Image.open(f)
# Create thumbnail
thumbnail_size = (150, 150)
image.thumbnail(thumbnail_size, Image.Resampling.LANCZOS)
# Save processed image
output = io.BytesIO()
image.save(output, format='JPEG', quality=85)
output.seek(0)
# Create new ContentFile
thumbnail_file = ContentFile(
output.getvalue(),
name=f"thumb_{image_field.name}"
)
return thumbnail_file
# Usage in model
class Photo(models.Model):
original = models.ImageField(upload_to='photos/')
thumbnail = models.ImageField(upload_to='thumbnails/', blank=True)
def save(self, *args, **kwargs):
super().save(*args, **kwargs)
if self.original and not self.thumbnail:
self.thumbnail = process_image(self.original)
super().save(update_fields=['thumbnail'])
import csv
import json
from django.core.files.base import ContentFile
def process_csv_file(csv_file):
"""Process uploaded CSV file"""
# Decode and read CSV
csv_file.seek(0)
decoded_file = csv_file.read().decode('utf-8')
csv_reader = csv.DictReader(decoded_file.splitlines())
# Process rows
processed_data = []
for row in csv_reader:
# Process each row
processed_row = {
'name': row.get('name', '').strip(),
'email': row.get('email', '').lower(),
'processed_at': timezone.now().isoformat()
}
processed_data.append(processed_row)
# Create JSON output
json_content = json.dumps(processed_data, indent=2)
json_file = ContentFile(
json_content.encode('utf-8'),
name='processed_data.json'
)
return json_file, processed_data
import magic
from django.core.exceptions import ValidationError
def validate_file_content(file_obj):
"""Validate file based on actual content"""
# Read file header
file_obj.seek(0)
file_header = file_obj.read(1024)
file_obj.seek(0)
# Use python-magic to detect file type
file_type = magic.from_buffer(file_header, mime=True)
allowed_types = [
'image/jpeg',
'image/png',
'application/pdf',
'text/plain'
]
if file_type not in allowed_types:
raise ValidationError(f"File type {file_type} not allowed")
return True
def scan_for_malware(file_obj):
"""Basic malware scanning"""
# Read file content
file_obj.seek(0)
content = file_obj.read()
file_obj.seek(0)
# Simple signature detection
malicious_signatures = [
b'<script',
b'javascript:',
b'<?php'
]
content_lower = content.lower()
for signature in malicious_signatures:
if signature in content_lower:
raise ValidationError("Potentially malicious content detected")
return True
from django.core.files.base import File
class ProcessedFile(File):
"""Custom file class with additional processing"""
def __init__(self, file, name=None, processor=None):
super().__init__(file, name)
self.processor = processor
self._processed_content = None
@property
def processed_content(self):
"""Get processed file content"""
if self._processed_content is None:
self.seek(0)
content = self.read()
self.seek(0)
if self.processor:
self._processed_content = self.processor(content)
else:
self._processed_content = content
return self._processed_content
def save_processed(self, name, storage=None):
"""Save processed version of file"""
processed_file = ContentFile(
self.processed_content,
name=name
)
if storage:
return storage.save(name, processed_file)
else:
from django.core.files.storage import default_storage
return default_storage.save(name, processed_file)
from django.http import StreamingHttpResponse
import os
def stream_file(request, file_path):
"""Stream large file to client"""
def file_iterator(file_path, chunk_size=8192):
with open(file_path, 'rb') as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
yield chunk
# Get file info
file_size = os.path.getsize(file_path)
filename = os.path.basename(file_path)
# Create streaming response
response = StreamingHttpResponse(
file_iterator(file_path),
content_type='application/octet-stream'
)
response['Content-Length'] = file_size
response['Content-Disposition'] = f'attachment; filename="{filename}"'
return response
chunks() for large filesseek(0) when neededNow that you understand Django's File object, let's explore storage backends and how to configure different storage systems for your files.
Files in Models
Django provides specialized model fields for handling file uploads. These fields integrate seamlessly with Django's storage system and provide built-in validation and security features.
Storage Backends
Django's storage system provides an abstraction layer for file operations, allowing you to switch between different storage backends without changing your application code. This flexibility is essential for scalable applications that need to handle files efficiently.