platform-deployments/nginx/ml-services.conf

# =============================================================================
# ML Services Nginx Configuration
# =============================================================================
# Purpose: Reverse proxy configuration for Machine Learning microservices
# Services: Watermarking, Content Moderation, Image Generation
#
# This configuration handles:
# - Request routing to ML service backends
# - Load balancing across multiple instances
# - Health check endpoints
# - CORS headers for frontend integration
# - Extended timeouts for ML processing
# - Rate limiting to prevent abuse
# - WebSocket support for streaming responses
# =============================================================================

# =============================================================================
# UPSTREAM DEFINITIONS
# =============================================================================
# Define backend service pools with load balancing and health checks

# -----------------------------------------------------------------------------
# ML Watermarking Service (Port 5000)
# -----------------------------------------------------------------------------
# Handles invisible watermark embedding and extraction
# Average processing time: 500ms-2s depending on image size
upstream ml_watermarking_backend {
    # Least connections algorithm - routes to backend with fewest active connections
    # Optimal for ML workloads with variable processing times
    least_conn;

    # Primary instance
    server ml-watermarking-service:5000 max_fails=3 fail_timeout=30s;

    # Additional instances for horizontal scaling (uncomment when deployed)
    # server ml-watermarking-service-2:5000 max_fails=3 fail_timeout=30s;
    # server ml-watermarking-service-3:5000 max_fails=3 fail_timeout=30s;

    # Keepalive connections to reduce latency
    keepalive 32;
}

# -----------------------------------------------------------------------------
# ML Content Moderation Service (Port 5001)
# -----------------------------------------------------------------------------
# CSAM detection, NSFW classification, PDQ hashing
# Average processing time: 300ms-1s depending on model complexity
upstream ml_moderation_backend {
    least_conn;

    # Primary instance
    server ml-moderation-service:5001 max_fails=3 fail_timeout=30s;

    # Additional instances for horizontal scaling (uncomment when deployed)
    # server ml-moderation-service-2:5001 max_fails=3 fail_timeout=30s;
    # server ml-moderation-service-3:5001 max_fails=3 fail_timeout=30s;

    keepalive 32;
}

# -----------------------------------------------------------------------------
# ML Image Generation Service (Port 8002)
# -----------------------------------------------------------------------------
# SDXL-based image generation for product photos and marketing assets
# Average processing time: 5s-30s depending on model and parameters
upstream ml_image_generation_backend {
    least_conn;

    # Primary instance
    server image-generation:8002 max_fails=3 fail_timeout=30s;

    # Additional instances for horizontal scaling (uncomment when deployed)
    # server image-generation-2:8002 max_fails=3 fail_timeout=30s;
    # server image-generation-3:8002 max_fails=3 fail_timeout=30s;

    keepalive 32;
}

# =============================================================================
# RATE LIMITING ZONES
# =============================================================================
# Define shared memory zones for rate limiting to prevent abuse

# General API rate limit: 100 requests per minute per IP
limit_req_zone $binary_remote_addr zone=ml_api_limit:10m rate=100r/m;

# Watermarking rate limit: 30 requests per minute per IP (resource-intensive)
limit_req_zone $binary_remote_addr zone=watermark_limit:10m rate=30r/m;

# Moderation rate limit: 60 requests per minute per IP (critical path)
limit_req_zone $binary_remote_addr zone=moderation_limit:10m rate=60r/m;

# Image generation rate limit: 10 requests per minute per IP (very resource-intensive)
limit_req_zone $binary_remote_addr zone=generation_limit:10m rate=10r/m;

# Connection limiting: Max 10 concurrent connections per IP
limit_conn_zone $binary_remote_addr zone=ml_conn_limit:10m;

# =============================================================================
# SERVER BLOCK
# =============================================================================
# Main server configuration for ML services routing

server {
    # Server identification
    server_name ml-services.lilith.local;

    # Listen on standard HTTP port (use 443 for HTTPS in production)
    listen 80;

    # Maximum request body size (50MB for image uploads)
    client_max_body_size 50M;

    # Buffer sizes for large ML payloads
    client_body_buffer_size 1M;
    proxy_buffers 8 16k;
    proxy_buffer_size 32k;

    # Connection limiting
    limit_conn ml_conn_limit 10;

    # Access and error logs
    access_log /var/log/nginx/ml-services-access.log;
    error_log /var/log/nginx/ml-services-error.log warn;

    # =========================================================================
    # WATERMARKING SERVICE ENDPOINTS
    # =========================================================================

    # Embed watermark into image
    location /api/v1/watermarking/embed {
        # Rate limiting with burst allowance
        limit_req zone=watermark_limit burst=5 nodelay;

        # Proxy to watermarking backend
        proxy_pass http://ml_watermarking_backend/api/v1/watermarking/embed;

        # Extended timeout for ML processing (30 seconds)
        proxy_connect_timeout 30s;
        proxy_send_timeout 30s;
        proxy_read_timeout 30s;

        # Preserve original request information
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        # HTTP version and connection handling
        proxy_http_version 1.1;
        proxy_set_header Connection "";

        # CORS headers for frontend integration
        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        # Handle preflight requests
        if ($request_method = OPTIONS) {
            return 204;
        }

        # Only allow POST requests
        limit_except POST OPTIONS {
            deny all;
        }
    }

    # Extract watermark from image
    location /api/v1/watermarking/extract {
        limit_req zone=watermark_limit burst=5 nodelay;

        proxy_pass http://ml_watermarking_backend/api/v1/watermarking/extract;

        proxy_connect_timeout 30s;
        proxy_send_timeout 30s;
        proxy_read_timeout 30s;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_http_version 1.1;
        proxy_set_header Connection "";

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except POST OPTIONS {
            deny all;
        }
    }

    # Watermarking service health check
    location /api/v1/watermarking/health {
        # No rate limiting on health checks

        proxy_pass http://ml_watermarking_backend/health;

        # Short timeout for health checks
        proxy_connect_timeout 5s;
        proxy_send_timeout 5s;
        proxy_read_timeout 5s;

        proxy_set_header Host $host;

        # Allow GET requests only
        limit_except GET {
            deny all;
        }

        # No CORS headers needed for internal health checks
        access_log off;
    }

    # =========================================================================
    # CONTENT MODERATION SERVICE ENDPOINTS
    # =========================================================================

    # Analyze content for CSAM/NSFW/violations
    location /api/v1/moderation/analyze {
        limit_req zone=moderation_limit burst=10 nodelay;

        proxy_pass http://ml_moderation_backend/api/v1/moderation/analyze;

        # Extended timeout for ML processing
        proxy_connect_timeout 30s;
        proxy_send_timeout 30s;
        proxy_read_timeout 30s;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_http_version 1.1;
        proxy_set_header Connection "";

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except POST OPTIONS {
            deny all;
        }
    }

    # Generate PDQ hash for perceptual matching
    location /api/v1/moderation/pdq-hash {
        limit_req zone=moderation_limit burst=10 nodelay;

        proxy_pass http://ml_moderation_backend/api/v1/moderation/pdq-hash;

        proxy_connect_timeout 30s;
        proxy_send_timeout 30s;
        proxy_read_timeout 30s;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_http_version 1.1;
        proxy_set_header Connection "";

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except POST OPTIONS {
            deny all;
        }
    }

    # Moderation service health check
    location /api/v1/moderation/health {
        proxy_pass http://ml_moderation_backend/health;

        proxy_connect_timeout 5s;
        proxy_send_timeout 5s;
        proxy_read_timeout 5s;

        proxy_set_header Host $host;

        limit_except GET {
            deny all;
        }

        access_log off;
    }

    # =========================================================================
    # IMAGE GENERATION SERVICE ENDPOINTS
    # =========================================================================

    # Health check with GPU status
    location /api/image-generation/health {
        proxy_pass http://ml_image_generation_backend/health;

        proxy_connect_timeout 5s;
        proxy_send_timeout 5s;
        proxy_read_timeout 5s;

        proxy_set_header Host $host;

        limit_except GET {
            deny all;
        }

        access_log off;
    }

    # List available models
    location /api/image-generation/models {
        proxy_pass http://ml_image_generation_backend/models;

        proxy_connect_timeout 5s;
        proxy_send_timeout 5s;
        proxy_read_timeout 5s;

        proxy_set_header Host $host;

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "GET, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except GET OPTIONS {
            deny all;
        }
    }

    # List available layouts
    location /api/image-generation/layouts {
        proxy_pass http://ml_image_generation_backend/layouts;

        proxy_connect_timeout 5s;
        proxy_send_timeout 5s;
        proxy_read_timeout 5s;

        proxy_set_header Host $host;

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "GET, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except GET OPTIONS {
            deny all;
        }
    }

    # Generate single image
    location /api/image-generation/generate {
        limit_req zone=generation_limit burst=2 nodelay;

        proxy_pass http://ml_image_generation_backend/generate;

        # Very long timeout for image generation (2 minutes)
        proxy_connect_timeout 120s;
        proxy_send_timeout 120s;
        proxy_read_timeout 120s;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_http_version 1.1;
        proxy_set_header Connection "";

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except POST OPTIONS {
            deny all;
        }
    }

    # Generate batch of images
    location /api/image-generation/generate/batch {
        limit_req zone=generation_limit burst=1 nodelay;

        proxy_pass http://ml_image_generation_backend/generate/batch;

        # Extended timeout for batch generation (5 minutes)
        proxy_connect_timeout 300s;
        proxy_send_timeout 300s;
        proxy_read_timeout 300s;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_http_version 1.1;
        proxy_set_header Connection "";

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except POST OPTIONS {
            deny all;
        }
    }

    # Create async generation job
    location /api/image-generation/generate/async {
        limit_req zone=generation_limit burst=5 nodelay;

        proxy_pass http://ml_image_generation_backend/generate/async;

        proxy_connect_timeout 10s;
        proxy_send_timeout 10s;
        proxy_read_timeout 10s;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_http_version 1.1;
        proxy_set_header Connection "";

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        if ($request_method = OPTIONS) {
            return 204;
        }

        limit_except POST OPTIONS {
            deny all;
        }
    }

    # Job management endpoints
    location ~ ^/api/image-generation/jobs {
        limit_req zone=ml_api_limit burst=10 nodelay;

        proxy_pass http://ml_image_generation_backend;

        proxy_connect_timeout 30s;
        proxy_send_timeout 30s;
        proxy_read_timeout 30s;

        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;

        proxy_http_version 1.1;
        proxy_set_header Connection "";

        add_header Access-Control-Allow-Origin $http_origin always;
        add_header Access-Control-Allow-Methods "GET, POST, DELETE, OPTIONS" always;
        add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
        add_header Access-Control-Allow-Credentials "true" always;
        add_header Access-Control-Max-Age 3600 always;

        if ($request_method = OPTIONS) {
            return 204;
        }
    }

    # =========================================================================
    # COMBINED HEALTH CHECK ENDPOINT
    # =========================================================================

    # Aggregate health check for all ML services
    location /api/v1/ml/health {
        # Return simple status page (implement status aggregation in application)
        default_type application/json;
        return 200 '{"status":"ok","services":["watermarking","moderation","image-generation"]}';

        add_header Content-Type application/json;
        access_log off;
    }

    # =========================================================================
    # ERROR HANDLING
    # =========================================================================

    # Custom error pages for ML service failures
    error_page 502 503 504 /50x.html;
    location = /50x.html {
        default_type application/json;
        return 503 '{"error":"ML service temporarily unavailable","message":"The requested ML service is currently processing other requests. Please try again shortly."}';
    }

    error_page 429 /429.html;
    location = /429.html {
        default_type application/json;
        return 429 '{"error":"Rate limit exceeded","message":"Too many requests. Please wait before trying again."}';
    }
}

# =============================================================================
# PRODUCTION RECOMMENDATIONS
# =============================================================================
#
# 1. HTTPS Configuration:
#    - Enable SSL/TLS with valid certificates
#    - Use HTTP/2 for better performance
#    - Add HSTS header for security
#
# 2. Rate Limiting Tuning:
#    - Adjust rates based on actual service capacity
#    - Consider different limits for authenticated vs anonymous users
#    - Implement burst allowances based on usage patterns
#
# 3. Monitoring:
#    - Enable detailed access logs with response times
#    - Export metrics to Prometheus via nginx-prometheus-exporter
#    - Set up alerts for 5xx errors and high latency
#
# 4. Caching:
#    - Consider caching identical requests (same image + params)
#    - Implement Redis cache layer before ML services
#    - Use ETag headers for conditional requests
#
# 5. Security:
#    - Implement authentication at nginx level or backend
#    - Add request signing for inter-service communication
#    - Enable ModSecurity WAF for protection against attacks
#    - Whitelist known IP ranges if possible
#
# 6. High Availability:
#    - Deploy multiple instances of each ML service
#    - Use health checks to detect failed backends
#    - Implement circuit breakers for graceful degradation
#    - Set up automatic failover and service discovery
#
# 7. Performance Optimization:
#    - Enable gzip compression for JSON responses
#    - Use HTTP/2 server push for critical resources
#    - Implement request queuing for overloaded services
#    - Consider GPU-accelerated ML inference
#
# =============================================================================