Docker: - Add image-generation service with GPU support (cuda:0/cuda:1) - Configure model cache and job persistence volumes - Set up Redis for job queue (db 2) - Health check on port 8002 Nginx: - Update upstream to image-generation:8002 - Add new API endpoints for health, models, jobs - Configure proper timeouts for generation (120s) - Add WebSocket support for progress streaming 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
587 lines
20 KiB
Text
587 lines
20 KiB
Text
# =============================================================================
|
|
# ML Services Nginx Configuration
|
|
# =============================================================================
|
|
# Purpose: Reverse proxy configuration for Machine Learning microservices
|
|
# Services: Watermarking, Content Moderation, Image Generation
|
|
#
|
|
# This configuration handles:
|
|
# - Request routing to ML service backends
|
|
# - Load balancing across multiple instances
|
|
# - Health check endpoints
|
|
# - CORS headers for frontend integration
|
|
# - Extended timeouts for ML processing
|
|
# - Rate limiting to prevent abuse
|
|
# - WebSocket support for streaming responses
|
|
# =============================================================================
|
|
|
|
# =============================================================================
|
|
# UPSTREAM DEFINITIONS
|
|
# =============================================================================
|
|
# Define backend service pools with load balancing and health checks
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# ML Watermarking Service (Port 5000)
|
|
# -----------------------------------------------------------------------------
|
|
# Handles invisible watermark embedding and extraction
|
|
# Average processing time: 500ms-2s depending on image size
|
|
upstream ml_watermarking_backend {
|
|
# Least connections algorithm - routes to backend with fewest active connections
|
|
# Optimal for ML workloads with variable processing times
|
|
least_conn;
|
|
|
|
# Primary instance
|
|
server ml-watermarking-service:5000 max_fails=3 fail_timeout=30s;
|
|
|
|
# Additional instances for horizontal scaling (uncomment when deployed)
|
|
# server ml-watermarking-service-2:5000 max_fails=3 fail_timeout=30s;
|
|
# server ml-watermarking-service-3:5000 max_fails=3 fail_timeout=30s;
|
|
|
|
# Keepalive connections to reduce latency
|
|
keepalive 32;
|
|
}
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# ML Content Moderation Service (Port 5001)
|
|
# -----------------------------------------------------------------------------
|
|
# CSAM detection, NSFW classification, PDQ hashing
|
|
# Average processing time: 300ms-1s depending on model complexity
|
|
upstream ml_moderation_backend {
|
|
least_conn;
|
|
|
|
# Primary instance
|
|
server ml-moderation-service:5001 max_fails=3 fail_timeout=30s;
|
|
|
|
# Additional instances for horizontal scaling (uncomment when deployed)
|
|
# server ml-moderation-service-2:5001 max_fails=3 fail_timeout=30s;
|
|
# server ml-moderation-service-3:5001 max_fails=3 fail_timeout=30s;
|
|
|
|
keepalive 32;
|
|
}
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# ML Image Generation Service (Port 8002)
|
|
# -----------------------------------------------------------------------------
|
|
# SDXL-based image generation for product photos and marketing assets
|
|
# Average processing time: 5s-30s depending on model and parameters
|
|
upstream ml_image_generation_backend {
|
|
least_conn;
|
|
|
|
# Primary instance
|
|
server image-generation:8002 max_fails=3 fail_timeout=30s;
|
|
|
|
# Additional instances for horizontal scaling (uncomment when deployed)
|
|
# server image-generation-2:8002 max_fails=3 fail_timeout=30s;
|
|
# server image-generation-3:8002 max_fails=3 fail_timeout=30s;
|
|
|
|
keepalive 32;
|
|
}
|
|
|
|
# =============================================================================
|
|
# RATE LIMITING ZONES
|
|
# =============================================================================
|
|
# Define shared memory zones for rate limiting to prevent abuse
|
|
|
|
# General API rate limit: 100 requests per minute per IP
|
|
limit_req_zone $binary_remote_addr zone=ml_api_limit:10m rate=100r/m;
|
|
|
|
# Watermarking rate limit: 30 requests per minute per IP (resource-intensive)
|
|
limit_req_zone $binary_remote_addr zone=watermark_limit:10m rate=30r/m;
|
|
|
|
# Moderation rate limit: 60 requests per minute per IP (critical path)
|
|
limit_req_zone $binary_remote_addr zone=moderation_limit:10m rate=60r/m;
|
|
|
|
# Image generation rate limit: 10 requests per minute per IP (very resource-intensive)
|
|
limit_req_zone $binary_remote_addr zone=generation_limit:10m rate=10r/m;
|
|
|
|
# Connection limiting: Max 10 concurrent connections per IP
|
|
limit_conn_zone $binary_remote_addr zone=ml_conn_limit:10m;
|
|
|
|
# =============================================================================
|
|
# SERVER BLOCK
|
|
# =============================================================================
|
|
# Main server configuration for ML services routing
|
|
|
|
server {
|
|
# Server identification
|
|
server_name ml-services.lilith.local;
|
|
|
|
# Listen on standard HTTP port (use 443 for HTTPS in production)
|
|
listen 80;
|
|
|
|
# Maximum request body size (50MB for image uploads)
|
|
client_max_body_size 50M;
|
|
|
|
# Buffer sizes for large ML payloads
|
|
client_body_buffer_size 1M;
|
|
proxy_buffers 8 16k;
|
|
proxy_buffer_size 32k;
|
|
|
|
# Connection limiting
|
|
limit_conn ml_conn_limit 10;
|
|
|
|
# Access and error logs
|
|
access_log /var/log/nginx/ml-services-access.log;
|
|
error_log /var/log/nginx/ml-services-error.log warn;
|
|
|
|
# =========================================================================
|
|
# WATERMARKING SERVICE ENDPOINTS
|
|
# =========================================================================
|
|
|
|
# Embed watermark into image
|
|
location /api/v1/watermarking/embed {
|
|
# Rate limiting with burst allowance
|
|
limit_req zone=watermark_limit burst=5 nodelay;
|
|
|
|
# Proxy to watermarking backend
|
|
proxy_pass http://ml_watermarking_backend/api/v1/watermarking/embed;
|
|
|
|
# Extended timeout for ML processing (30 seconds)
|
|
proxy_connect_timeout 30s;
|
|
proxy_send_timeout 30s;
|
|
proxy_read_timeout 30s;
|
|
|
|
# Preserve original request information
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
# HTTP version and connection handling
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
# CORS headers for frontend integration
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
# Handle preflight requests
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
# Only allow POST requests
|
|
limit_except POST OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Extract watermark from image
|
|
location /api/v1/watermarking/extract {
|
|
limit_req zone=watermark_limit burst=5 nodelay;
|
|
|
|
proxy_pass http://ml_watermarking_backend/api/v1/watermarking/extract;
|
|
|
|
proxy_connect_timeout 30s;
|
|
proxy_send_timeout 30s;
|
|
proxy_read_timeout 30s;
|
|
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except POST OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Watermarking service health check
|
|
location /api/v1/watermarking/health {
|
|
# No rate limiting on health checks
|
|
|
|
proxy_pass http://ml_watermarking_backend/health;
|
|
|
|
# Short timeout for health checks
|
|
proxy_connect_timeout 5s;
|
|
proxy_send_timeout 5s;
|
|
proxy_read_timeout 5s;
|
|
|
|
proxy_set_header Host $host;
|
|
|
|
# Allow GET requests only
|
|
limit_except GET {
|
|
deny all;
|
|
}
|
|
|
|
# No CORS headers needed for internal health checks
|
|
access_log off;
|
|
}
|
|
|
|
# =========================================================================
|
|
# CONTENT MODERATION SERVICE ENDPOINTS
|
|
# =========================================================================
|
|
|
|
# Analyze content for CSAM/NSFW/violations
|
|
location /api/v1/moderation/analyze {
|
|
limit_req zone=moderation_limit burst=10 nodelay;
|
|
|
|
proxy_pass http://ml_moderation_backend/api/v1/moderation/analyze;
|
|
|
|
# Extended timeout for ML processing
|
|
proxy_connect_timeout 30s;
|
|
proxy_send_timeout 30s;
|
|
proxy_read_timeout 30s;
|
|
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except POST OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Generate PDQ hash for perceptual matching
|
|
location /api/v1/moderation/pdq-hash {
|
|
limit_req zone=moderation_limit burst=10 nodelay;
|
|
|
|
proxy_pass http://ml_moderation_backend/api/v1/moderation/pdq-hash;
|
|
|
|
proxy_connect_timeout 30s;
|
|
proxy_send_timeout 30s;
|
|
proxy_read_timeout 30s;
|
|
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except POST OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Moderation service health check
|
|
location /api/v1/moderation/health {
|
|
proxy_pass http://ml_moderation_backend/health;
|
|
|
|
proxy_connect_timeout 5s;
|
|
proxy_send_timeout 5s;
|
|
proxy_read_timeout 5s;
|
|
|
|
proxy_set_header Host $host;
|
|
|
|
limit_except GET {
|
|
deny all;
|
|
}
|
|
|
|
access_log off;
|
|
}
|
|
|
|
# =========================================================================
|
|
# IMAGE GENERATION SERVICE ENDPOINTS
|
|
# =========================================================================
|
|
|
|
# Health check with GPU status
|
|
location /api/image-generation/health {
|
|
proxy_pass http://ml_image_generation_backend/health;
|
|
|
|
proxy_connect_timeout 5s;
|
|
proxy_send_timeout 5s;
|
|
proxy_read_timeout 5s;
|
|
|
|
proxy_set_header Host $host;
|
|
|
|
limit_except GET {
|
|
deny all;
|
|
}
|
|
|
|
access_log off;
|
|
}
|
|
|
|
# List available models
|
|
location /api/image-generation/models {
|
|
proxy_pass http://ml_image_generation_backend/models;
|
|
|
|
proxy_connect_timeout 5s;
|
|
proxy_send_timeout 5s;
|
|
proxy_read_timeout 5s;
|
|
|
|
proxy_set_header Host $host;
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "GET, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except GET OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# List available layouts
|
|
location /api/image-generation/layouts {
|
|
proxy_pass http://ml_image_generation_backend/layouts;
|
|
|
|
proxy_connect_timeout 5s;
|
|
proxy_send_timeout 5s;
|
|
proxy_read_timeout 5s;
|
|
|
|
proxy_set_header Host $host;
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "GET, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except GET OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Generate single image
|
|
location /api/image-generation/generate {
|
|
limit_req zone=generation_limit burst=2 nodelay;
|
|
|
|
proxy_pass http://ml_image_generation_backend/generate;
|
|
|
|
# Very long timeout for image generation (2 minutes)
|
|
proxy_connect_timeout 120s;
|
|
proxy_send_timeout 120s;
|
|
proxy_read_timeout 120s;
|
|
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except POST OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Generate batch of images
|
|
location /api/image-generation/generate/batch {
|
|
limit_req zone=generation_limit burst=1 nodelay;
|
|
|
|
proxy_pass http://ml_image_generation_backend/generate/batch;
|
|
|
|
# Extended timeout for batch generation (5 minutes)
|
|
proxy_connect_timeout 300s;
|
|
proxy_send_timeout 300s;
|
|
proxy_read_timeout 300s;
|
|
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except POST OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Create async generation job
|
|
location /api/image-generation/generate/async {
|
|
limit_req zone=generation_limit burst=5 nodelay;
|
|
|
|
proxy_pass http://ml_image_generation_backend/generate/async;
|
|
|
|
proxy_connect_timeout 10s;
|
|
proxy_send_timeout 10s;
|
|
proxy_read_timeout 10s;
|
|
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "POST, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
|
|
limit_except POST OPTIONS {
|
|
deny all;
|
|
}
|
|
}
|
|
|
|
# Job management endpoints
|
|
location ~ ^/api/image-generation/jobs {
|
|
limit_req zone=ml_api_limit burst=10 nodelay;
|
|
|
|
proxy_pass http://ml_image_generation_backend;
|
|
|
|
proxy_connect_timeout 30s;
|
|
proxy_send_timeout 30s;
|
|
proxy_read_timeout 30s;
|
|
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Connection "";
|
|
|
|
add_header Access-Control-Allow-Origin $http_origin always;
|
|
add_header Access-Control-Allow-Methods "GET, POST, DELETE, OPTIONS" always;
|
|
add_header Access-Control-Allow-Headers "Authorization, Content-Type, X-Request-ID" always;
|
|
add_header Access-Control-Allow-Credentials "true" always;
|
|
add_header Access-Control-Max-Age 3600 always;
|
|
|
|
if ($request_method = OPTIONS) {
|
|
return 204;
|
|
}
|
|
}
|
|
|
|
# =========================================================================
|
|
# COMBINED HEALTH CHECK ENDPOINT
|
|
# =========================================================================
|
|
|
|
# Aggregate health check for all ML services
|
|
location /api/v1/ml/health {
|
|
# Return simple status page (implement status aggregation in application)
|
|
default_type application/json;
|
|
return 200 '{"status":"ok","services":["watermarking","moderation","image-generation"]}';
|
|
|
|
add_header Content-Type application/json;
|
|
access_log off;
|
|
}
|
|
|
|
# =========================================================================
|
|
# ERROR HANDLING
|
|
# =========================================================================
|
|
|
|
# Custom error pages for ML service failures
|
|
error_page 502 503 504 /50x.html;
|
|
location = /50x.html {
|
|
default_type application/json;
|
|
return 503 '{"error":"ML service temporarily unavailable","message":"The requested ML service is currently processing other requests. Please try again shortly."}';
|
|
}
|
|
|
|
error_page 429 /429.html;
|
|
location = /429.html {
|
|
default_type application/json;
|
|
return 429 '{"error":"Rate limit exceeded","message":"Too many requests. Please wait before trying again."}';
|
|
}
|
|
}
|
|
|
|
# =============================================================================
|
|
# PRODUCTION RECOMMENDATIONS
|
|
# =============================================================================
|
|
#
|
|
# 1. HTTPS Configuration:
|
|
# - Enable SSL/TLS with valid certificates
|
|
# - Use HTTP/2 for better performance
|
|
# - Add HSTS header for security
|
|
#
|
|
# 2. Rate Limiting Tuning:
|
|
# - Adjust rates based on actual service capacity
|
|
# - Consider different limits for authenticated vs anonymous users
|
|
# - Implement burst allowances based on usage patterns
|
|
#
|
|
# 3. Monitoring:
|
|
# - Enable detailed access logs with response times
|
|
# - Export metrics to Prometheus via nginx-prometheus-exporter
|
|
# - Set up alerts for 5xx errors and high latency
|
|
#
|
|
# 4. Caching:
|
|
# - Consider caching identical requests (same image + params)
|
|
# - Implement Redis cache layer before ML services
|
|
# - Use ETag headers for conditional requests
|
|
#
|
|
# 5. Security:
|
|
# - Implement authentication at nginx level or backend
|
|
# - Add request signing for inter-service communication
|
|
# - Enable ModSecurity WAF for protection against attacks
|
|
# - Whitelist known IP ranges if possible
|
|
#
|
|
# 6. High Availability:
|
|
# - Deploy multiple instances of each ML service
|
|
# - Use health checks to detect failed backends
|
|
# - Implement circuit breakers for graceful degradation
|
|
# - Set up automatic failover and service discovery
|
|
#
|
|
# 7. Performance Optimization:
|
|
# - Enable gzip compression for JSON responses
|
|
# - Use HTTP/2 server push for critical resources
|
|
# - Implement request queuing for overloaded services
|
|
# - Consider GPU-accelerated ML inference
|
|
#
|
|
# =============================================================================
|