platform-deployments/systemd/conversation-ml.service

# =============================================================================
# CONVERSATION ML SERVICE: Systemd Unit File
# =============================================================================
#
# Runs the ML inference service on apricot (GPU host: 10.9.0.1)
# Serves ML models for conversation assistant on port 8100
#
# Installation:
#   1. Copy to /etc/systemd/system/conversation-ml.service
#   2. Create /opt/conversation-ml/.env with required variables
#   3. systemctl daemon-reload
#   4. systemctl enable conversation-ml.service
#   5. systemctl start conversation-ml.service
#
# Management:
#   - systemctl status conversation-ml
#   - systemctl restart conversation-ml
#   - journalctl -u conversation-ml -f
#
# =============================================================================

[Unit]
Description=Conversation Assistant ML Service
Documentation=https://github.com/lilith-platform/conversation-assistant
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
User=lilith
Group=lilith
WorkingDirectory=/opt/conversation-ml

# Environment
Environment=PYTHONUNBUFFERED=1
Environment=CUDA_VISIBLE_DEVICES=0
Environment=GPU_SERVICE_NAME=conversation-ml
EnvironmentFile=/opt/conversation-ml/.env

# Service execution
ExecStart=/opt/conversation-ml/venv/bin/uvicorn \
    main:app \
    --host 0.0.0.0 \
    --port 8100 \
    --workers 2 \
    --log-level info \
    --no-access-log

# Process management
Restart=always
RestartSec=10
KillMode=mixed
KillSignal=SIGTERM
TimeoutStopSec=30

# Resource limits
LimitNOFILE=65535
MemoryMax=16G
CPUQuota=400%

# Security hardening
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/opt/conversation-ml
ReadOnlyPaths=/opt/conversation-ml/models

# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=conversation-ml

[Install]
WantedBy=multi-user.target