platform-deployments/systemd/conversation-ml.service

75 lines
1.9 KiB
Desktop File

# =============================================================================
# CONVERSATION ML SERVICE: Systemd Unit File
# =============================================================================
#
# Runs the ML inference service on apricot (GPU host: 10.9.0.1)
# Serves ML models for conversation assistant on port 8100
#
# Installation:
# 1. Copy to /etc/systemd/system/conversation-ml.service
# 2. Create /opt/conversation-ml/.env with required variables
# 3. systemctl daemon-reload
# 4. systemctl enable conversation-ml.service
# 5. systemctl start conversation-ml.service
#
# Management:
# - systemctl status conversation-ml
# - systemctl restart conversation-ml
# - journalctl -u conversation-ml -f
#
# =============================================================================
[Unit]
Description=Conversation Assistant ML Service
Documentation=https://github.com/lilith-platform/conversation-assistant
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=lilith
Group=lilith
WorkingDirectory=/opt/conversation-ml
# Environment
Environment=PYTHONUNBUFFERED=1
Environment=CUDA_VISIBLE_DEVICES=0
Environment=GPU_SERVICE_NAME=conversation-ml
EnvironmentFile=/opt/conversation-ml/.env
# Service execution
ExecStart=/opt/conversation-ml/venv/bin/uvicorn \
main:app \
--host 0.0.0.0 \
--port 8100 \
--workers 2 \
--log-level info \
--no-access-log
# Process management
Restart=always
RestartSec=10
KillMode=mixed
KillSignal=SIGTERM
TimeoutStopSec=30
# Resource limits
LimitNOFILE=65535
MemoryMax=16G
CPUQuota=400%
# Security hardening
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/opt/conversation-ml
ReadOnlyPaths=/opt/conversation-ml/models
# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=conversation-ml
[Install]
WantedBy=multi-user.target