platform-deployments/nginx/ml-services.conf
Quinn Ftw 38fc44bc26 feat(image-generation): add SDXL service to docker compose and nginx
Docker:
- Add image-generation service with GPU support (cuda:0/cuda:1)
- Configure model cache and job persistence volumes
- Set up Redis for job queue (db 2)
- Health check on port 8002

Nginx:
- Update upstream to image-generation:8002
- Add new API endpoints for health, models, jobs
- Configure proper timeouts for generation (120s)
- Add WebSocket support for progress streaming

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 19:23:01 -08:00

587 lines
20 KiB
Text

# =============================================================================
# ML Services Nginx Configuration
# =============================================================================
# Purpose: Reverse proxy configuration for Machine Learning microservices
# Services: Watermarking, Content Moderation, Image Generation
#
# This configuration handles:
# - Request routing to ML service backends
# - Load balancing across multiple instances
# - Health check endpoints
# - CORS headers for frontend integration
# - Extended timeouts for ML processing
# - Rate limiting to prevent abuse
# - WebSocket support for streaming responses
# =============================================================================
# =============================================================================
# UPSTREAM DEFINITIONS
# =============================================================================
# Define backend service pools with load balancing and health checks
# -----------------------------------------------------------------------------
# ML Watermarking Service (Port 5000)
# -----------------------------------------------------------------------------
# Handles invisible watermark embedding and extraction
# Average processing time: 500ms-2s depending on image size
upstream ml_watermarking_backend {
# Least connections algorithm - routes to backend with fewest active connections
# Optimal for ML workloads with variable processing times
least_conn;
# Primary instance
server ml-watermarking-service:5000 max_fails=3 fail_timeout=30s;
# Additional instances for horizontal scaling (uncomment when deployed)
# server ml-watermarking-service-2:5000 max_fails=3 fail_timeout=30s;
# server ml-watermarking-service-3:5000 max_fails=3 fail_timeout=30s;
# Keepalive connections to reduce latency
keepalive 32;
}
# -----------------------------------------------------------------------------
# ML Content Moderation Service (Port 5001)
# -----------------------------------------------------------------------------
# CSAM detection, NSFW classification, PDQ hashing
# Average processing time: 300ms-1s depending on model complexity
upstream ml_moderation_backend {
least_conn;
# Primary instance
server ml-moderation-service:5001 max_fails=3 fail_timeout=30s;
# Additional instances for horizontal scaling (uncomment when deployed)
# server ml-moderation-service-2:5001 max_fails=3 fail_timeout=30s;
# server ml-moderation-service-3:5001 max_fails=3 fail_timeout=30s;
keepalive 32;
}
# -----------------------------------------------------------------------------
# ML Image Generation Service (Port 8002)
# -----------------------------------------------------------------------------
# SDXL-based image generation for product photos and marketing assets
# Average processing time: 5s-30s depending on model and parameters
upstream ml_image_generation_backend {
least_conn;
# Primary instance
server image-generation:8002 max_fails=3 fail_timeout=30s;
# Additional instances for horizontal scaling (uncomment when deployed)
# server image-generation-2:8002 max_fails=3 fail_timeout=30s;
# server image-generation-3:8002 max_fails=3 fail_timeout=30s;
keepalive 32;
}
# =============================================================================
# RATE LIMITING ZONES
# =============================================================================
# Define shared memory zones for rate limiting to prevent abuse
# General API rate limit: 100 requests per minute per IP
limit_req_zone $binary_remote_addr zone=ml_api_limit:10m rate=100r/m;
# Watermarking rate limit: 30 requests per minute per IP (resource-intensive)
limit_req_zone $binary_remote_addr zone=watermark_limit:10m rate=30r/m;
# Moderation rate limit: 60 requests per minute per IP (critical path)
limit_req_zone $binary_remote_addr zone=moderation_limit:10m rate=60r/m;
# Image generation rate limit: 10 requests per minute per IP (very resource-intensive)
limit_req_zone $binary_remote_addr zone=generation_limit:10m rate=10r/m;
# Connection limiting: Max 10 concurrent connections per IP
limit_conn_zone $binary_remote_addr zone=ml_conn_limit:10m;
# =============================================================================
# SERVER BLOCK
# =============================================================================
# Main server configuration for ML services routing
server {
# Server identification
server_name ml-services.lilith.local;
# Listen on standard HTTP port (use 443 for HTTPS in production)
listen 80;
# Maximum request body size (50MB for image uploads)
client_max_body_size 50M;
# Buffer sizes for large ML payloads
client_body_buffer_size 1M;
proxy_buffers 8 16k;
proxy_buffer_size 32k;
# Connection limiting
limit_conn ml_conn_limit 10;
# Access and error logs
access_log /var/log/nginx/ml-services-access.log;
error_log /var/log/nginx/ml-services-error.log warn;
# =========================================================================
# WATERMARKING SERVICE ENDPOINTS
# =========================================================================
# Embed watermark into image
location /api/v1/watermarking/embed {
# Rate limiting with burst allowance
limit_req zone=watermark_limit burst=5 nodelay;
# Proxy to watermarking backend
proxy_pass http://ml_watermarking_backend/api/v1/watermarking/embed;
# Extended timeout for ML processing (30 seconds)
proxy_connect_timeout 30s;
proxy_send_timeout 30s;
proxy_read_timeout 30s;
# Preserve original request information
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# HTTP version and connection handling
proxy_http_version 1.1;
proxy_set_header Connection "";
# CORS headers for frontend integration
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
# Handle preflight requests
if ($request_method = OPTIONS) {
return 204;
}
# Only allow POST requests
limit_except POST OPTIONS {
deny all;
}
}
# Extract watermark from image
location /api/v1/watermarking/extract {
limit_req zone=watermark_limit burst=5 nodelay;
proxy_pass http://ml_watermarking_backend/api/v1/watermarking/extract;
proxy_connect_timeout 30s;
proxy_send_timeout 30s;
proxy_read_timeout 30s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except POST OPTIONS {
deny all;
}
}
# Watermarking service health check
location /api/v1/watermarking/health {
# No rate limiting on health checks
proxy_pass http://ml_watermarking_backend/health;
# Short timeout for health checks
proxy_connect_timeout 5s;
proxy_send_timeout 5s;
proxy_read_timeout 5s;
proxy_set_header Host $host;
# Allow GET requests only
limit_except GET {
deny all;
}
# No CORS headers needed for internal health checks
access_log off;
}
# =========================================================================
# CONTENT MODERATION SERVICE ENDPOINTS
# =========================================================================
# Analyze content for CSAM/NSFW/violations
location /api/v1/moderation/analyze {
limit_req zone=moderation_limit burst=10 nodelay;
proxy_pass http://ml_moderation_backend/api/v1/moderation/analyze;
# Extended timeout for ML processing
proxy_connect_timeout 30s;
proxy_send_timeout 30s;
proxy_read_timeout 30s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except POST OPTIONS {
deny all;
}
}
# Generate PDQ hash for perceptual matching
location /api/v1/moderation/pdq-hash {
limit_req zone=moderation_limit burst=10 nodelay;
proxy_pass http://ml_moderation_backend/api/v1/moderation/pdq-hash;
proxy_connect_timeout 30s;
proxy_send_timeout 30s;
proxy_read_timeout 30s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except POST OPTIONS {
deny all;
}
}
# Moderation service health check
location /api/v1/moderation/health {
proxy_pass http://ml_moderation_backend/health;
proxy_connect_timeout 5s;
proxy_send_timeout 5s;
proxy_read_timeout 5s;
proxy_set_header Host $host;
limit_except GET {
deny all;
}
access_log off;
}
# =========================================================================
# IMAGE GENERATION SERVICE ENDPOINTS
# =========================================================================
# Health check with GPU status
location /api/image-generation/health {
proxy_pass http://ml_image_generation_backend/health;
proxy_connect_timeout 5s;
proxy_send_timeout 5s;
proxy_read_timeout 5s;
proxy_set_header Host $host;
limit_except GET {
deny all;
}
access_log off;
}
# List available models
location /api/image-generation/models {
proxy_pass http://ml_image_generation_backend/models;
proxy_connect_timeout 5s;
proxy_send_timeout 5s;
proxy_read_timeout 5s;
proxy_set_header Host $host;
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "GET, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except GET OPTIONS {
deny all;
}
}
# List available layouts
location /api/image-generation/layouts {
proxy_pass http://ml_image_generation_backend/layouts;
proxy_connect_timeout 5s;
proxy_send_timeout 5s;
proxy_read_timeout 5s;
proxy_set_header Host $host;
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "GET, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except GET OPTIONS {
deny all;
}
}
# Generate single image
location /api/image-generation/generate {
limit_req zone=generation_limit burst=2 nodelay;
proxy_pass http://ml_image_generation_backend/generate;
# Very long timeout for image generation (2 minutes)
proxy_connect_timeout 120s;
proxy_send_timeout 120s;
proxy_read_timeout 120s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except POST OPTIONS {
deny all;
}
}
# Generate batch of images
location /api/image-generation/generate/batch {
limit_req zone=generation_limit burst=1 nodelay;
proxy_pass http://ml_image_generation_backend/generate/batch;
# Extended timeout for batch generation (5 minutes)
proxy_connect_timeout 300s;
proxy_send_timeout 300s;
proxy_read_timeout 300s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except POST OPTIONS {
deny all;
}
}
# Create async generation job
location /api/image-generation/generate/async {
limit_req zone=generation_limit burst=5 nodelay;
proxy_pass http://ml_image_generation_backend/generate/async;
proxy_connect_timeout 10s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
if ($request_method = OPTIONS) {
return 204;
}
limit_except POST OPTIONS {
deny all;
}
}
# Job management endpoints
location ~ ^/api/image-generation/jobs {
limit_req zone=ml_api_limit burst=10 nodelay;
proxy_pass http://ml_image_generation_backend;
proxy_connect_timeout 30s;
proxy_send_timeout 30s;
proxy_read_timeout 30s;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
add_header Access-Control-Allow-Origin $http_origin always;
add_header Access-Control-Allow-Methods "GET, POST, DELETE, OPTIONS" always;
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
add_header Access-Control-Allow-Credentials "true" always;
add_header Access-Control-Max-Age 3600 always;
if ($request_method = OPTIONS) {
return 204;
}
}
# =========================================================================
# COMBINED HEALTH CHECK ENDPOINT
# =========================================================================
# Aggregate health check for all ML services
location /api/v1/ml/health {
# Return simple status page (implement status aggregation in application)
default_type application/json;
return 200 '{"status":"ok","services":["watermarking","moderation","image-generation"]}';
add_header Content-Type application/json;
access_log off;
}
# =========================================================================
# ERROR HANDLING
# =========================================================================
# Custom error pages for ML service failures
error_page 502 503 504 /50x.html;
location = /50x.html {
default_type application/json;
return 503 '{"error":"ML service temporarily unavailable","message":"The requested ML service is currently processing other requests. Please try again shortly."}';
}
error_page 429 /429.html;
location = /429.html {
default_type application/json;
return 429 '{"error":"Rate limit exceeded","message":"Too many requests. Please wait before trying again."}';
}
}
# =============================================================================
# PRODUCTION RECOMMENDATIONS
# =============================================================================
#
# 1. HTTPS Configuration:
# - Enable SSL/TLS with valid certificates
# - Use HTTP/2 for better performance
# - Add HSTS header for security
#
# 2. Rate Limiting Tuning:
# - Adjust rates based on actual service capacity
# - Consider different limits for authenticated vs anonymous users
# - Implement burst allowances based on usage patterns
#
# 3. Monitoring:
# - Enable detailed access logs with response times
# - Export metrics to Prometheus via nginx-prometheus-exporter
# - Set up alerts for 5xx errors and high latency
#
# 4. Caching:
# - Consider caching identical requests (same image + params)
# - Implement Redis cache layer before ML services
# - Use ETag headers for conditional requests
#
# 5. Security:
# - Implement authentication at nginx level or backend
# - Add request signing for inter-service communication
# - Enable ModSecurity WAF for protection against attacks
# - Whitelist known IP ranges if possible
#
# 6. High Availability:
# - Deploy multiple instances of each ML service
# - Use health checks to detect failed backends
# - Implement circuit breakers for graceful degradation
# - Set up automatic failover and service discovery
#
# 7. Performance Optimization:
# - Enable gzip compression for JSON responses
# - Use HTTP/2 server push for critical resources
# - Implement request queuing for overloaded services
# - Consider GPU-accelerated ML inference
#
# =============================================================================