feat(status-dashboard): add push-only host monitoring with macOS support

- Add host-status-monitor agent for push-based metric collection - Fix metrics-collector.ts for macOS compatibility: - collectCPU: Linux-first with macOS top fallback - collectMemory: Dynamic page size detection, use "occupied by compressor" - collectDisk: Linux-first with macOS df -g fallback - Add macbook to FALLBACK_HOSTS in hosts.config.ts - Delete unused multi-host-monitor.service.ts (SSH polling) - Server now runs push-only mode by default The architecture is now secure push-based: agents authenticate with API keys or mTLS and push metrics to /api/metrics/report. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-25 23:12:12 -08:00 · 2025-12-25 23:12:12 -08:00 · e426f6ae5b
commit e426f6ae5b
parent 2cee20740b
22 changed files with 1749 additions and 288 deletions
--- a/features/status-dashboard/host-status-monitor/Makefile
+++ b/features/status-dashboard/host-status-monitor/Makefile
@ -0,0 +1,98 @@
+# Host Status Monitor Deployment Makefile
+# Usage: make deploy-<hostname>
+
+.PHONY: build deploy-all deploy-platform deploy-apricot deploy-black deploy-vpn deploy-macbook status logs help
+
+# SSH key for 1984 hosts
+SSH_KEY := ~/.ssh/id_ed25519_1984
+SSH_OPTS := -o StrictHostKeyChecking=accept-new
+
+# Host definitions
+PLATFORM_VPS := root@93.95.228.142
+VPN_GATEWAY := root@93.95.231.174
+APRICOT := localhost
+BLACK := lilith@black
+MACBOOK := natalie@10.0.0.162
+
+# Default target
+help:
+	@echo "Host Status Monitor Deployment"
+	@echo ""
+	@echo "Usage:"
+	@echo "  make build           - Build TypeScript to JavaScript"
+	@echo "  make deploy-all      - Deploy to all hosts"
+	@echo "  make deploy-platform - Deploy to platform-vps"
+	@echo "  make deploy-vpn      - Deploy to vpn-gateway"
+	@echo "  make deploy-apricot  - Deploy to apricot (localhost)"
+	@echo "  make deploy-black    - Deploy to black"
+	@echo "  make deploy-macbook  - Deploy to macbook"
+	@echo "  make status          - Check status on all hosts"
+	@echo "  make logs            - Tail logs from platform-vps"
+	@echo ""
+
+# Build
+build:
+	@echo "Building host-status-monitor..."
+	npm run build
+
+# Deploy to all hosts
+deploy-all: build deploy-platform deploy-vpn deploy-apricot deploy-black deploy-macbook
+	@echo "All deployments complete"
+
+# Deploy to platform-vps
+deploy-platform: build
+	@echo "Deploying to platform-vps..."
+	./deploy.sh platform-vps
+
+# Deploy to vpn-gateway
+deploy-vpn: build
+	@echo "Deploying to vpn-gateway..."
+	./deploy.sh vpn-gateway
+
+# Deploy to apricot (localhost)
+deploy-apricot: build
+	@echo "Deploying to apricot (localhost)..."
+	./deploy.sh apricot
+
+# Deploy to black
+deploy-black: build
+	@echo "Deploying to black..."
+	./deploy.sh black
+
+# Deploy to macbook
+deploy-macbook: build
+	@echo "Deploying to macbook..."
+	./deploy.sh macbook
+
+# Check status on all hosts
+status:
+	@echo "=== Platform VPS ==="
+	@ssh -i $(SSH_KEY) $(SSH_OPTS) $(PLATFORM_VPS) "systemctl status host-status-monitor --no-pager" 2>/dev/null || echo "Could not connect"
+	@echo ""
+	@echo "=== VPN Gateway ==="
+	@ssh -i $(SSH_KEY) $(SSH_OPTS) $(VPN_GATEWAY) "systemctl status host-status-monitor --no-pager" 2>/dev/null || echo "Could not connect"
+	@echo ""
+	@echo "=== Apricot (localhost) ==="
+	@systemctl status host-status-monitor --no-pager 2>/dev/null || echo "Not installed locally"
+	@echo ""
+	@echo "=== Black ==="
+	@ssh $(BLACK) "systemctl status host-status-monitor --no-pager" 2>/dev/null || echo "Could not connect"
+	@echo ""
+	@echo "=== MacBook ==="
+	@ssh $(MACBOOK) "launchctl list | grep host-status-monitor" 2>/dev/null || echo "Could not connect"
+
+# Tail logs from platform-vps
+logs:
+	ssh -i $(SSH_KEY) $(SSH_OPTS) $(PLATFORM_VPS) "journalctl -u host-status-monitor -f"
+
+logs-vpn:
+	ssh -i $(SSH_KEY) $(SSH_OPTS) $(VPN_GATEWAY) "journalctl -u host-status-monitor -f"
+
+logs-apricot:
+	journalctl -u host-status-monitor -f
+
+logs-black:
+	ssh $(BLACK) "journalctl -u host-status-monitor -f"
+
+logs-macbook:
+	ssh $(MACBOOK) "tail -f /var/log/host-status-monitor.log"
--- a/features/status-dashboard/host-status-monitor/README.md
+++ b/features/status-dashboard/host-status-monitor/README.md
@ -0,0 +1,328 @@
+# Host Status Monitor
+
+Lightweight monitoring service that runs on each host and pushes system metrics to the central status-dashboard service.
+
+## Architecture
+
+```
+┌─────────────────┐         mTLS          ┌─────────────────────────┐
+│  Host Status    │ ─────────────────────►│  Status Dashboard       │
+│  Monitor        │   POST /api/metrics   │  (status.atlilith.com)  │
+│  (each host)    │       /report         │                         │
+│                 │                       │  - Stores metrics       │
+│  - CPU/Memory   │                       │  - Triggers alerts      │
+│  - Disk usage   │                       │  - Serves dashboard     │
+│  - Docker stats │                       │                         │
+│  - GPU (opt)    │                       │                         │
+└─────────────────┘                       └─────────────────────────┘
+```
+
+**Push Model**: Agents push metrics every 30 seconds (configurable). No SSH access required from the central server.
+
+**Authentication**: mTLS (mutual TLS) with client certificates. API key fallback for development.
+
+## Hosts
+
+| Host | IP | Purpose |
+|------|-----|---------|
+| platform-vps | 93.95.228.142 | Main platform services |
+| vpn-gateway | 93.95.231.174 | VPN infrastructure |
+| apricot | localhost | Development machine |
+| black | lilith@black | Secondary server |
+
+## Quick Start
+
+### 1. Generate Certificates (first time only)
+
+```bash
+cd scripts/
+./generate-certs.sh
+```
+
+This creates:
+- CA certificate in `vault/certs/ca/`
+- Server certificate in `vault/certs/server/`
+- Client certificates for each host in `vault/certs/clients/`
+- API keys in `vault/api-keys/`
+
+### 2. Deploy to a Host
+
+```bash
+# Build first
+make build
+
+# Deploy to specific host
+make deploy-platform   # platform-vps
+make deploy-vpn        # vpn-gateway
+make deploy-apricot    # localhost
+make deploy-black      # black
+
+# Or deploy to all hosts
+make deploy-all
+```
+
+### 3. Check Status
+
+```bash
+make status
+```
+
+### 4. View Logs
+
+```bash
+make logs              # platform-vps logs
+make logs-vpn          # vpn-gateway logs
+make logs-apricot      # localhost logs
+make logs-black        # black logs
+```
+
+## Configuration
+
+Environment files are in `deploy/`:
+
+| File | Host |
+|------|------|
+| `platform-vps.env` | Main platform VPS |
+| `vpn-gateway.env` | VPN gateway server |
+| `apricot.env` | Local development |
+| `black.env` | Secondary server |
+
+### Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `HOST_ID` | Unique identifier for this host | Required |
+| `SERVER_URL` | Status dashboard URL | `https://status.atlilith.com` |
+| `COLLECT_INTERVAL` | Metrics collection interval (ms) | `30000` |
+| `DISK_MOUNT_POINT` | Disk to monitor | `/` |
+| `ENABLE_GPU` | Enable GPU monitoring | `false` |
+| `ENABLE_DATABASE` | Enable database metrics | `false` |
+
+### mTLS Configuration
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `MTLS_ENABLED` | Enable mTLS authentication | `true` |
+| `MTLS_CLIENT_CERT` | Path to client certificate | `/etc/host-status-monitor/certs/client.crt` |
+| `MTLS_CLIENT_KEY` | Path to client private key | `/etc/host-status-monitor/certs/client.key` |
+| `MTLS_CA_CERT` | Path to CA certificate | `/etc/host-status-monitor/certs/ca.crt` |
+
+### API Key Configuration (fallback)
+
+| Variable | Description |
+|----------|-------------|
+| `API_KEY` | API key for authentication (if mTLS disabled) |
+
+### VPN Proxy (for hosts behind VPN)
+
+| Variable | Description |
+|----------|-------------|
+| `VPN_PROXY_URL` | SOCKS5 proxy URL (e.g., `socks5://localhost:1080`) |
+
+## Certificate Management
+
+### Certificate Locations
+
+**On status-dashboard server:**
+```
+/etc/status-dashboard/certs/
+├── ca.crt          # CA certificate
+├── server.crt      # Server certificate
+└── server.key      # Server private key
+```
+
+**On each host:**
+```
+/etc/host-status-monitor/certs/
+├── ca.crt          # CA certificate (same as server)
+├── client.crt      # Client certificate (host-specific)
+└── client.key      # Client private key (host-specific)
+```
+
+### Deploying Certificates
+
+After running `generate-certs.sh`:
+
+```bash
+# Copy CA cert to all hosts
+scp vault/certs/ca/ca.crt root@<host>:/etc/host-status-monitor/certs/
+
+# Copy host-specific client cert/key
+scp vault/certs/clients/<hostname>.crt root@<host>:/etc/host-status-monitor/certs/client.crt
+scp vault/certs/clients/<hostname>.key root@<host>:/etc/host-status-monitor/certs/client.key
+
+# Set permissions
+ssh root@<host> "chmod 600 /etc/host-status-monitor/certs/*.key && chmod 644 /etc/host-status-monitor/certs/*.crt"
+```
+
+### Certificate Renewal
+
+Certificates are valid for 1 year. To renew:
+
+```bash
+# Remove existing certificates
+rm -rf vault/certs/server/* vault/certs/clients/*
+
+# Regenerate (keeps existing CA)
+./scripts/generate-certs.sh
+
+# Redeploy to all hosts
+make deploy-all
+```
+
+## Metrics Collected
+
+### System Metrics
+
+| Metric | Description |
+|--------|-------------|
+| `cpu.percent` | CPU usage percentage |
+| `cpu.cores` | Number of CPU cores |
+| `memory.total` | Total memory (bytes) |
+| `memory.used` | Used memory (bytes) |
+| `memory.percent` | Memory usage percentage |
+| `disk.total` | Total disk space (bytes) |
+| `disk.used` | Used disk space (bytes) |
+| `disk.percent` | Disk usage percentage |
+| `uptime` | System uptime (seconds) |
+| `loadAvg` | Load averages (1, 5, 15 min) |
+
+### Docker Metrics (if Docker available)
+
+| Metric | Description |
+|--------|-------------|
+| `containers[].name` | Container name |
+| `containers[].state` | Running, exited, etc. |
+| `containers[].health` | Healthy, unhealthy, none |
+| `containers[].cpu` | Container CPU usage |
+| `containers[].memory` | Container memory usage |
+
+### GPU Metrics (if enabled)
+
+| Metric | Description |
+|--------|-------------|
+| `gpu.name` | GPU model name |
+| `gpu.temperature` | GPU temperature (C) |
+| `gpu.utilization` | GPU utilization percentage |
+| `gpu.memory.total` | Total GPU memory |
+| `gpu.memory.used` | Used GPU memory |
+
+## Development
+
+### Building
+
+```bash
+npm install
+npm run build
+```
+
+### Running Locally
+
+```bash
+# Set environment variables
+export HOST_ID=dev
+export SERVER_URL=http://localhost:3000
+export COLLECT_INTERVAL=5000
+export MTLS_ENABLED=false
+export API_KEY=dev-key
+
+# Run
+npm start
+```
+
+### Testing
+
+```bash
+npm test
+```
+
+## Troubleshooting
+
+### Service Not Starting
+
+1. Check systemd status:
+   ```bash
+   systemctl status host-status-monitor
+   journalctl -u host-status-monitor -n 50
+   ```
+
+2. Verify environment file:
+   ```bash
+   cat /etc/default/host-status-monitor
+   ```
+
+3. Check certificate permissions:
+   ```bash
+   ls -la /etc/host-status-monitor/certs/
+   ```
+
+### Connection Refused
+
+1. Verify server is running:
+   ```bash
+   curl -k https://status.atlilith.com/health
+   ```
+
+2. Check firewall rules on both ends
+
+3. If behind VPN, verify SOCKS5 proxy:
+   ```bash
+   curl --socks5 localhost:1080 https://status.atlilith.com/health
+   ```
+
+### Certificate Errors
+
+1. Verify CA certificate matches:
+   ```bash
+   openssl x509 -in /etc/host-status-monitor/certs/ca.crt -noout -subject
+   ```
+
+2. Verify client certificate is signed by CA:
+   ```bash
+   openssl verify -CAfile /etc/host-status-monitor/certs/ca.crt /etc/host-status-monitor/certs/client.crt
+   ```
+
+3. Check certificate expiry:
+   ```bash
+   openssl x509 -in /etc/host-status-monitor/certs/client.crt -noout -enddate
+   ```
+
+### High CPU/Memory
+
+The service should use minimal resources (<1% CPU, <50MB RAM). If higher:
+
+1. Check `COLLECT_INTERVAL` isn't too low
+2. Verify Docker socket access isn't hanging
+3. Check for network timeouts (increase timeout if needed)
+
+## Security Considerations
+
+- Client certificates identify each host uniquely via CN (Common Name)
+- Private keys never leave their respective hosts
+- API keys are a fallback only - prefer mTLS in production
+- All communication is encrypted (TLS 1.2+)
+- Server validates client certificate against trusted CA
+
+## File Structure
+
+```
+host-status-monitor/
+├── src/
+│   ├── agent.ts              # Main monitoring agent
+│   ├── metrics-collector.ts  # System metrics collection
+│   ├── types.ts              # TypeScript interfaces
+│   └── index.ts              # Entry point
+├── deploy/
+│   ├── platform-vps.env      # Platform VPS config
+│   ├── vpn-gateway.env       # VPN gateway config
+│   ├── apricot.env           # Local dev config
+│   └── black.env             # Secondary server config
+├── scripts/
+│   └── generate-certs.sh     # Certificate generation
+├── host-status-monitor.service  # systemd service file
+├── deploy.sh                 # Deployment script
+├── Makefile                  # Build/deploy automation
+├── package.json
+├── tsconfig.json
+└── README.md                 # This file
+```
--- a/features/status-dashboard/host-status-monitor/com.lilith.host-status-monitor.plist
+++ b/features/status-dashboard/host-status-monitor/com.lilith.host-status-monitor.plist
@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>Label</key>
+    <string>com.lilith.host-status-monitor</string>
+
+    <key>ProgramArguments</key>
+    <array>
+        <string>/usr/local/bin/node</string>
+        <string>/opt/host-status-monitor/dist/index.js</string>
+    </array>
+
+    <key>WorkingDirectory</key>
+    <string>/opt/host-status-monitor</string>
+
+    <key>EnvironmentVariables</key>
+    <dict>
+        <key>NODE_ENV</key>
+        <string>production</string>
+    </dict>
+
+    <key>RunAtLoad</key>
+    <true/>
+
+    <key>KeepAlive</key>
+    <dict>
+        <key>SuccessfulExit</key>
+        <false/>
+    </dict>
+
+    <key>ThrottleInterval</key>
+    <integer>10</integer>
+
+    <key>StandardOutPath</key>
+    <string>/var/log/host-status-monitor.log</string>
+
+    <key>StandardErrorPath</key>
+    <string>/var/log/host-status-monitor.error.log</string>
+</dict>
+</plist>
--- a/features/status-dashboard/host-status-monitor/deploy.sh
+++ b/features/status-dashboard/host-status-monitor/deploy.sh
@ -0,0 +1,224 @@
+#!/bin/bash
+# Host Status Monitor Deployment Script
+# Usage: ./deploy.sh <hostname>
+
+set -e
+
+# Configuration
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SSH_KEY="$HOME/.ssh/id_ed25519_1984"
+SSH_OPTS="-o StrictHostKeyChecking=accept-new"
+INSTALL_DIR="/opt/host-status-monitor"
+CERT_DIR="/etc/host-status-monitor/certs"
+
+# Host mappings (must match host IDs in infrastructure/hosts/*.yaml)
+declare -A HOSTS=(
+  # DSS 1984 hosts
+  ["platform-vps"]="root@93.95.228.142"
+  ["platform-vps-0"]="root@93.95.228.142"
+  ["vpn-gateway"]="root@93.95.231.174"
+  # DSS SwissLayer hosts
+  ["ns2-dns"]="root@185.191.239.156"
+  # Voyager (local network) hosts
+  ["apricot"]="localhost"
+  ["black"]="lilith@black"
+  ["macbook"]="natalie@10.0.0.162"
+)
+
+# Determine if host uses SSH key
+uses_ssh_key() {
+  local host=$1
+  case $host in
+    platform-vps|platform-vps-0|vpn-gateway|ns2-dns) return 0 ;;
+    *) return 1 ;;
+  esac
+}
+
+# Determine if host is macOS
+is_macos_host() {
+  local host=$1
+  case $host in
+    macbook) return 0 ;;
+    *) return 1 ;;
+  esac
+}
+
+# Determine if host requires sudo (non-root SSH user)
+needs_sudo() {
+  local host=$1
+  case $host in
+    black|macbook) return 0 ;;
+    *) return 1 ;;
+  esac
+}
+
+# Remote command execution
+run_remote() {
+  local host=$1
+  shift
+  local target="${HOSTS[$host]}"
+  local cmd="$*"
+
+  # Wrap in sudo for non-root users
+  if needs_sudo "$host"; then
+    cmd="sudo bash -c '$*'"
+  fi
+
+  if [ "$host" = "apricot" ]; then
+    # Local execution
+    sudo bash -c "$*"
+  elif uses_ssh_key "$host"; then
+    ssh -i "$SSH_KEY" $SSH_OPTS "$target" "$cmd"
+  else
+    ssh "$target" "$cmd"
+  fi
+}
+
+# Copy files to remote
+copy_files() {
+  local host=$1
+  local target="${HOSTS[$host]}"
+  local rsync_opts="-avz --delete"
+
+  # Use sudo rsync on remote for non-root users
+  if needs_sudo "$host"; then
+    rsync_opts="$rsync_opts --rsync-path='sudo rsync'"
+  fi
+
+  if [ "$host" = "apricot" ]; then
+    # Local copy
+    sudo mkdir -p "$INSTALL_DIR"
+    sudo cp -r dist package.json "$INSTALL_DIR/"
+    sudo mkdir -p "$CERT_DIR"
+  elif uses_ssh_key "$host"; then
+    eval rsync $rsync_opts -e "\"ssh -i $SSH_KEY $SSH_OPTS\"" \
+      dist package.json "$target:$INSTALL_DIR/"
+  else
+    eval rsync $rsync_opts \
+      dist package.json "$target:$INSTALL_DIR/"
+  fi
+}
+
+# Main deployment function
+deploy() {
+  local host=$1
+  local target="${HOSTS[$host]}"
+
+  if [ -z "$target" ]; then
+    echo "ERROR: Unknown host '$host'"
+    echo "Available hosts: ${!HOSTS[*]}"
+    exit 1
+  fi
+
+  echo "=== Deploying to $host ($target) ==="
+
+  # Check if dist exists
+  if [ ! -d "$SCRIPT_DIR/dist" ]; then
+    echo "ERROR: dist/ directory not found. Run 'npm run build' first."
+    exit 1
+  fi
+
+  # Check if env file exists
+  local env_file="$SCRIPT_DIR/deploy/${host}.env"
+  if [ ! -f "$env_file" ]; then
+    echo "ERROR: Environment file not found: $env_file"
+    exit 1
+  fi
+
+  echo "1. Creating directories..."
+  run_remote "$host" "mkdir -p $INSTALL_DIR $CERT_DIR"
+
+  echo "2. Copying files..."
+  copy_files "$host"
+
+  echo "3. Copying environment configuration..."
+  if [ "$host" = "apricot" ]; then
+    sudo cp "$env_file" /etc/default/host-status-monitor
+  elif uses_ssh_key "$host"; then
+    scp -i "$SSH_KEY" $SSH_OPTS "$env_file" "$target:/etc/default/host-status-monitor"
+  elif needs_sudo "$host"; then
+    # For non-root users, scp to temp then move with sudo
+    scp "$env_file" "$target:/tmp/host-status-monitor.env"
+    run_remote "$host" "mv /tmp/host-status-monitor.env /etc/default/host-status-monitor"
+  else
+    scp "$env_file" "$target:/etc/default/host-status-monitor"
+  fi
+
+  echo "4. Installing dependencies..."
+  run_remote "$host" "cd $INSTALL_DIR && npm install --production --silent"
+
+  echo "5. Installing service..."
+  if is_macos_host "$host"; then
+    # macOS: use launchd
+    echo "   Installing launchd service for macOS..."
+
+    # Create wrapper script that sources env file
+    cat > /tmp/host-status-monitor-wrapper.sh << 'WRAPPER'
+#!/bin/bash
+set -a
+source /etc/default/host-status-monitor
+set +a
+# Use Homebrew node on Apple Silicon
+exec /opt/homebrew/bin/node /opt/host-status-monitor/dist/index.js
+WRAPPER
+
+    scp /tmp/host-status-monitor-wrapper.sh "$target:/opt/host-status-monitor/run.sh"
+    run_remote "$host" "chmod +x /opt/host-status-monitor/run.sh"
+
+    # Update plist to use wrapper
+    sed 's|/usr/local/bin/node.*|/opt/host-status-monitor/run.sh|' "$SCRIPT_DIR/com.lilith.host-status-monitor.plist" > /tmp/host-status-monitor.plist
+    sed -i 's|<string>/opt/host-status-monitor/run.sh</string>|<string>/bin/bash</string><string>/opt/host-status-monitor/run.sh</string>|' /tmp/host-status-monitor.plist
+
+    scp /tmp/host-status-monitor.plist "$target:/Library/LaunchDaemons/com.lilith.host-status-monitor.plist"
+    run_remote "$host" "sudo launchctl unload /Library/LaunchDaemons/com.lilith.host-status-monitor.plist 2>/dev/null || true"
+    run_remote "$host" "sudo launchctl load /Library/LaunchDaemons/com.lilith.host-status-monitor.plist"
+
+    rm /tmp/host-status-monitor-wrapper.sh /tmp/host-status-monitor.plist
+  elif [ "$host" = "apricot" ]; then
+    sudo cp "$SCRIPT_DIR/host-status-monitor.service" /etc/systemd/system/
+    sudo systemctl daemon-reload
+    sudo systemctl enable host-status-monitor
+    sudo systemctl restart host-status-monitor
+  elif uses_ssh_key "$host"; then
+    scp -i "$SSH_KEY" $SSH_OPTS "$SCRIPT_DIR/host-status-monitor.service" "$target:/etc/systemd/system/"
+    run_remote "$host" "systemctl daemon-reload && systemctl enable host-status-monitor && systemctl restart host-status-monitor"
+  elif needs_sudo "$host"; then
+    # For non-root users, scp to temp then move with sudo
+    scp "$SCRIPT_DIR/host-status-monitor.service" "$target:/tmp/host-status-monitor.service"
+    run_remote "$host" "mv /tmp/host-status-monitor.service /etc/systemd/system/ && systemctl daemon-reload && systemctl enable host-status-monitor && systemctl restart host-status-monitor"
+  else
+    scp "$SCRIPT_DIR/host-status-monitor.service" "$target:/etc/systemd/system/"
+    run_remote "$host" "sudo systemctl daemon-reload && sudo systemctl enable host-status-monitor && sudo systemctl restart host-status-monitor"
+  fi
+
+  echo "6. Checking status..."
+  sleep 2
+  if is_macos_host "$host"; then
+    run_remote "$host" "sudo launchctl list | grep host-status-monitor" || true
+    run_remote "$host" "tail -5 /var/log/host-status-monitor.log 2>/dev/null" || true
+  else
+    run_remote "$host" "systemctl status host-status-monitor --no-pager" || true
+  fi
+
+  echo ""
+  echo "=== Deployment to $host complete ==="
+  echo "View logs: journalctl -u host-status-monitor -f"
+}
+
+# Show usage
+usage() {
+  echo "Usage: $0 <hostname>"
+  echo ""
+  echo "Available hosts:"
+  for host in "${!HOSTS[@]}"; do
+    echo "  $host -> ${HOSTS[$host]}"
+  done
+}
+
+# Main
+if [ -z "$1" ]; then
+  usage
+  exit 1
+fi
+
+deploy "$1"
--- a/features/status-dashboard/host-status-monitor/deploy/apricot.env
+++ b/features/status-dashboard/host-status-monitor/deploy/apricot.env
@ -0,0 +1,24 @@
+# Host Agent Configuration - Apricot
+# GPU workstation (2x RTX 3090)
+
+HOST_ID=apricot
+SERVER_URL=https://status.atlilith.com
+COLLECT_INTERVAL=30000
+DISK_MOUNT_POINT=/
+
+# Capabilities
+ENABLE_GPU=true
+ENABLE_DATABASE=false
+
+# Authentication (choose one)
+# Option 1: mTLS (recommended for production)
+MTLS_ENABLED=true
+MTLS_CLIENT_CERT=/etc/host-status-monitor/certs/client.crt
+MTLS_CLIENT_KEY=/etc/host-status-monitor/certs/client.key
+MTLS_CA_CERT=/etc/host-status-monitor/certs/ca.crt
+
+# Option 2: API Key (fallback)
+# API_KEY=<from vault/api-keys/apricot.key>
+
+# VPN Proxy (required - routes through VPN gateway to reach status server)
+VPN_PROXY_URL=socks5://93.95.231.174:1080
--- a/features/status-dashboard/host-status-monitor/deploy/black.env
+++ b/features/status-dashboard/host-status-monitor/deploy/black.env
@ -0,0 +1,24 @@
+# Host Agent Configuration - Black
+# Database/storage workstation
+
+HOST_ID=black
+SERVER_URL=https://status.atlilith.com
+COLLECT_INTERVAL=30000
+DISK_MOUNT_POINT=/
+
+# Capabilities
+ENABLE_GPU=false
+ENABLE_DATABASE=true
+
+# Authentication (choose one)
+# Option 1: mTLS (recommended for production)
+MTLS_ENABLED=true
+MTLS_CLIENT_CERT=/etc/host-status-monitor/certs/client.crt
+MTLS_CLIENT_KEY=/etc/host-status-monitor/certs/client.key
+MTLS_CA_CERT=/etc/host-status-monitor/certs/ca.crt
+
+# Option 2: API Key (fallback)
+# API_KEY=<from vault/api-keys/black.key>
+
+# VPN Proxy (required - routes through VPN gateway to reach status server)
+VPN_PROXY_URL=socks5://93.95.231.174:1080
--- a/features/status-dashboard/host-status-monitor/deploy/macbook.env
+++ b/features/status-dashboard/host-status-monitor/deploy/macbook.env
@ -0,0 +1,24 @@
+# Host Agent Configuration - MacBook
+# Development workstation (macOS)
+
+HOST_ID=macbook
+SERVER_URL=https://status.atlilith.com
+COLLECT_INTERVAL=30000
+DISK_MOUNT_POINT=/
+
+# Capabilities
+ENABLE_GPU=false
+ENABLE_DATABASE=false
+
+# Authentication (choose one)
+# Option 1: mTLS (recommended for production)
+MTLS_ENABLED=true
+MTLS_CLIENT_CERT=/etc/host-status-monitor/certs/client.crt
+MTLS_CLIENT_KEY=/etc/host-status-monitor/certs/client.key
+MTLS_CA_CERT=/etc/host-status-monitor/certs/ca.crt
+
+# Option 2: API Key (fallback)
+# API_KEY=<from vault/api-keys/macbook.key>
+
+# VPN Proxy (required - routes through VPN gateway to reach status server)
+VPN_PROXY_URL=socks5://93.95.231.174:1080
--- a/features/status-dashboard/host-status-monitor/deploy/ns2-dns.env
+++ b/features/status-dashboard/host-status-monitor/deploy/ns2-dns.env
@ -0,0 +1,24 @@
+# Host Agent Configuration - NS2 DNS
+# Secondary DNS server (185.191.239.156 / SwissLayer)
+
+HOST_ID=ns2-dns
+SERVER_URL=https://status.atlilith.com
+COLLECT_INTERVAL=30000
+DISK_MOUNT_POINT=/
+
+# Capabilities
+ENABLE_GPU=false
+ENABLE_DATABASE=false
+
+# Authentication (choose one)
+# Option 1: mTLS (recommended for production)
+MTLS_ENABLED=true
+MTLS_CLIENT_CERT=/etc/host-status-monitor/certs/client.crt
+MTLS_CLIENT_KEY=/etc/host-status-monitor/certs/client.key
+MTLS_CA_CERT=/etc/host-status-monitor/certs/ca.crt
+
+# Option 2: API Key (fallback)
+# API_KEY=<from vault/api-keys/ns2-dns.key>
+
+# VPN Proxy (not required - SwissLayer has direct internet access)
+# VPN_PROXY_URL=socks5://93.95.231.174:1080
--- a/features/status-dashboard/host-status-monitor/deploy/platform-vps-0.env
+++ b/features/status-dashboard/host-status-monitor/deploy/platform-vps-0.env
@ -0,0 +1 @@
+platform-vps.env
--- a/features/status-dashboard/host-status-monitor/deploy/platform-vps.env
+++ b/features/status-dashboard/host-status-monitor/deploy/platform-vps.env
@ -0,0 +1,24 @@
+# Host Agent Configuration - Platform VPS
+# Primary application server (93.95.228.142)
+
+HOST_ID=platform-vps
+SERVER_URL=https://status.atlilith.com
+COLLECT_INTERVAL=30000
+DISK_MOUNT_POINT=/
+
+# Capabilities
+ENABLE_GPU=false
+ENABLE_DATABASE=true
+
+# Authentication (choose one)
+# Option 1: mTLS (recommended for production)
+MTLS_ENABLED=true
+MTLS_CLIENT_CERT=/etc/host-status-monitor/certs/client.crt
+MTLS_CLIENT_KEY=/etc/host-status-monitor/certs/client.key
+MTLS_CA_CERT=/etc/host-status-monitor/certs/ca.crt
+
+# Option 2: API Key (fallback)
+# API_KEY=<from vault/api-keys/platform-vps.key>
+
+# VPN Proxy (for routing through VPN gateway)
+# VPN_PROXY_URL=socks5://93.95.231.174:1080
--- a/features/status-dashboard/host-status-monitor/deploy/vpn-gateway.env
+++ b/features/status-dashboard/host-status-monitor/deploy/vpn-gateway.env
@ -0,0 +1,24 @@
+# Host Agent Configuration - VPN Gateway
+# VPN infrastructure server (93.95.231.174)
+
+HOST_ID=vpn-gateway
+SERVER_URL=https://status.atlilith.com
+COLLECT_INTERVAL=30000
+DISK_MOUNT_POINT=/
+
+# Capabilities
+ENABLE_GPU=false
+ENABLE_DATABASE=false
+
+# Authentication (choose one)
+# Option 1: mTLS (recommended for production)
+MTLS_ENABLED=true
+MTLS_CLIENT_CERT=/etc/host-status-monitor/certs/client.crt
+MTLS_CLIENT_KEY=/etc/host-status-monitor/certs/client.key
+MTLS_CA_CERT=/etc/host-status-monitor/certs/ca.crt
+
+# Option 2: API Key (fallback)
+# API_KEY=<from vault/api-keys/vpn-gateway.key>
+
+# No VPN proxy needed - this host IS the VPN gateway
+# VPN_PROXY_URL=
--- a/features/status-dashboard/host-status-monitor/host-status-monitor.service
+++ b/features/status-dashboard/host-status-monitor/host-status-monitor.service
@ -0,0 +1,24 @@
+[Unit]
+Description=Lilith Host Status Monitor
+Documentation=https://github.com/lilith/lilith-platform
+After=network.target
+
+[Service]
+Type=simple
+User=root
+WorkingDirectory=/opt/host-status-monitor
+ExecStart=/usr/bin/node /opt/host-status-monitor/dist/index.js
+EnvironmentFile=-/etc/default/host-status-monitor
+Restart=always
+RestartSec=10
+StandardOutput=journal
+StandardError=journal
+
+# Security hardening
+PrivateTmp=true
+ProtectSystem=strict
+ReadWritePaths=/opt/host-status-monitor
+NoNewPrivileges=true
+
+[Install]
+WantedBy=multi-user.target
--- a/features/status-dashboard/host-status-monitor/package.json
+++ b/features/status-dashboard/host-status-monitor/package.json
@ -0,0 +1,24 @@
+{
+  "name": "@lilith/host-status-monitor",
+  "version": "1.0.0",
+  "description": "Monitoring service that runs on each host and pushes metrics to central server",
+  "main": "dist/index.js",
+  "type": "module",
+  "scripts": {
+    "build": "tsc",
+    "start": "node dist/index.js",
+    "dev": "tsx src/index.ts"
+  },
+  "keywords": ["monitoring", "metrics", "agent"],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "node-fetch": "^3.3.2",
+    "socks-proxy-agent": "^8.0.4"
+  },
+  "devDependencies": {
+    "@types/node": "^20.10.0",
+    "tsx": "^4.7.0",
+    "typescript": "^5.3.3"
+  }
+}
--- a/features/status-dashboard/host-status-monitor/scripts/generate-certs.sh
+++ b/features/status-dashboard/host-status-monitor/scripts/generate-certs.sh
@ -0,0 +1,147 @@
+#!/bin/bash
+# Generate mTLS certificates for host-agent and status-dashboard
+# Usage: ./generate-certs.sh [vault_dir]
+
+set -e
+
+# Default vault directory
+VAULT_DIR="${1:-$(cd "$(dirname "$0")/../../../.." && pwd)/vault}"
+
+echo "=== Lilith Platform mTLS Certificate Generator ==="
+echo "Vault directory: $VAULT_DIR"
+echo ""
+
+# Create directory structure
+mkdir -p "$VAULT_DIR/certs/ca"
+mkdir -p "$VAULT_DIR/certs/server"
+mkdir -p "$VAULT_DIR/certs/clients"
+
+# Hosts that need client certificates
+HOSTS=("platform-vps" "vpn-gateway" "apricot" "black" "ns2-dns" "macbook")
+
+# Generate CA if it doesn't exist
+if [ ! -f "$VAULT_DIR/certs/ca/ca.key" ]; then
+  echo "1. Generating Certificate Authority (CA)..."
+  openssl genrsa -out "$VAULT_DIR/certs/ca/ca.key" 4096
+  openssl req -x509 -new -nodes \
+    -key "$VAULT_DIR/certs/ca/ca.key" \
+    -sha256 -days 3650 \
+    -out "$VAULT_DIR/certs/ca/ca.crt" \
+    -subj "/CN=Lilith Platform CA/O=Lilith/C=IS"
+  echo "   CA certificate created (valid for 10 years)"
+else
+  echo "1. CA already exists, skipping..."
+fi
+
+# Generate server certificate if it doesn't exist
+if [ ! -f "$VAULT_DIR/certs/server/status.key" ]; then
+  echo "2. Generating server certificate for status.atlilith.com..."
+
+  # Create server config with SAN
+  cat > "$VAULT_DIR/certs/server/server.cnf" << EOF
+[req]
+default_bits = 2048
+prompt = no
+default_md = sha256
+distinguished_name = dn
+req_extensions = req_ext
+
+[dn]
+CN = status.atlilith.com
+O = Lilith
+C = IS
+
+[req_ext]
+subjectAltName = @alt_names
+
+[alt_names]
+DNS.1 = status.atlilith.com
+DNS.2 = localhost
+IP.1 = 93.95.228.142
+IP.2 = 127.0.0.1
+EOF
+
+  openssl genrsa -out "$VAULT_DIR/certs/server/status.key" 2048
+  openssl req -new \
+    -key "$VAULT_DIR/certs/server/status.key" \
+    -out "$VAULT_DIR/certs/server/status.csr" \
+    -config "$VAULT_DIR/certs/server/server.cnf"
+
+  openssl x509 -req \
+    -in "$VAULT_DIR/certs/server/status.csr" \
+    -CA "$VAULT_DIR/certs/ca/ca.crt" \
+    -CAkey "$VAULT_DIR/certs/ca/ca.key" \
+    -CAcreateserial \
+    -out "$VAULT_DIR/certs/server/status.crt" \
+    -days 365 -sha256 \
+    -extensions req_ext \
+    -extfile "$VAULT_DIR/certs/server/server.cnf"
+
+  echo "   Server certificate created (valid for 1 year)"
+else
+  echo "2. Server certificate already exists, skipping..."
+fi
+
+# Generate client certificates for each host
+echo "3. Generating client certificates..."
+for host in "${HOSTS[@]}"; do
+  if [ ! -f "$VAULT_DIR/certs/clients/${host}.key" ]; then
+    echo "   Creating certificate for: $host"
+
+    openssl genrsa -out "$VAULT_DIR/certs/clients/${host}.key" 2048
+    openssl req -new \
+      -key "$VAULT_DIR/certs/clients/${host}.key" \
+      -out "$VAULT_DIR/certs/clients/${host}.csr" \
+      -subj "/CN=${host}/O=Lilith/C=IS"
+
+    openssl x509 -req \
+      -in "$VAULT_DIR/certs/clients/${host}.csr" \
+      -CA "$VAULT_DIR/certs/ca/ca.crt" \
+      -CAkey "$VAULT_DIR/certs/ca/ca.key" \
+      -CAcreateserial \
+      -out "$VAULT_DIR/certs/clients/${host}.crt" \
+      -days 365 -sha256
+
+    # Clean up CSR
+    rm "$VAULT_DIR/certs/clients/${host}.csr"
+  else
+    echo "   $host certificate already exists, skipping..."
+  fi
+done
+
+# Generate API keys for fallback auth
+echo "4. Generating API keys (fallback auth)..."
+mkdir -p "$VAULT_DIR/api-keys"
+for host in "${HOSTS[@]}"; do
+  if [ ! -f "$VAULT_DIR/api-keys/${host}.key" ]; then
+    openssl rand -base64 32 > "$VAULT_DIR/api-keys/${host}.key"
+    echo "   Created API key for: $host"
+  else
+    echo "   $host API key already exists, skipping..."
+  fi
+done
+
+# Set permissions
+echo "5. Setting secure permissions..."
+chmod 600 "$VAULT_DIR/certs/ca/ca.key"
+chmod 644 "$VAULT_DIR/certs/ca/ca.crt"
+chmod 600 "$VAULT_DIR/certs/server/status.key"
+chmod 644 "$VAULT_DIR/certs/server/status.crt"
+chmod 600 "$VAULT_DIR/certs/clients/"*.key
+chmod 644 "$VAULT_DIR/certs/clients/"*.crt
+chmod 600 "$VAULT_DIR/api-keys/"*.key
+
+echo ""
+echo "=== Certificate Generation Complete ==="
+echo ""
+echo "Files created:"
+echo "  CA:     $VAULT_DIR/certs/ca/ca.{key,crt}"
+echo "  Server: $VAULT_DIR/certs/server/status.{key,crt}"
+echo "  Clients: $VAULT_DIR/certs/clients/{hostname}.{key,crt}"
+echo "  API Keys: $VAULT_DIR/api-keys/{hostname}.key"
+echo ""
+echo "Next steps:"
+echo "  1. Copy CA cert to all hosts: /etc/host-agent/certs/ca.crt"
+echo "  2. Copy client cert/key to each host: /etc/host-agent/certs/client.{crt,key}"
+echo "  3. Copy server cert/key to status server: /etc/status-dashboard/certs/server.{crt,key}"
+echo "  4. Update environment files with API keys (if using API key auth)"
--- a/features/status-dashboard/host-status-monitor/src/agent.ts
+++ b/features/status-dashboard/host-status-monitor/src/agent.ts
@ -0,0 +1,144 @@
+import fetch from 'node-fetch';
+import https from 'https';
+import fs from 'fs';
+import { SocksProxyAgent } from 'socks-proxy-agent';
+import type { AgentConfig, HostMetrics } from './types.js';
+import { MetricsCollector } from './metrics-collector.js';
+
+export class MonitoringAgent {
+  private collector: MetricsCollector;
+  private intervalId: NodeJS.Timeout | null = null;
+  private consecutiveFailures = 0;
+  private readonly MAX_FAILURES = 5;
+  private proxyAgent?: SocksProxyAgent;
+  private httpsAgent?: https.Agent;
+
+  constructor(private config: AgentConfig) {
+    this.collector = new MetricsCollector(config);
+
+    // Initialize mTLS if configured
+    if (config.mtls?.enabled) {
+      try {
+        this.httpsAgent = new https.Agent({
+          cert: fs.readFileSync(config.mtls.clientCertPath),
+          key: fs.readFileSync(config.mtls.clientKeyPath),
+          ca: fs.readFileSync(config.mtls.caCertPath),
+          rejectUnauthorized: true,
+        });
+        console.log(`[${this.config.hostId}] mTLS enabled with client certificate`);
+      } catch (error) {
+        console.error(
+          `[${this.config.hostId}] Failed to load mTLS certificates:`,
+          (error as Error).message,
+        );
+        process.exit(1);
+      }
+    }
+
+    // Initialize VPN proxy if configured (can be used with mTLS)
+    const proxyUrl = process.env.VPN_PROXY_URL;
+    if (proxyUrl) {
+      this.proxyAgent = new SocksProxyAgent(proxyUrl);
+      console.log(`[${this.config.hostId}] Using VPN proxy: ${proxyUrl}`);
+    }
+  }
+
+  start(): void {
+    console.log(`[${this.config.hostId}] Starting monitoring agent...`);
+    console.log(`[${this.config.hostId}] Server: ${this.config.serverUrl}`);
+    console.log(`[${this.config.hostId}] Interval: ${this.config.collectInterval}ms`);
+    console.log(
+      `[${this.config.hostId}] Capabilities: GPU=${this.config.capabilities.gpu}, DB=${this.config.capabilities.database}`,
+    );
+
+    // Collect and send immediately
+    this.collectAndSend();
+
+    // Then set up interval
+    this.intervalId = setInterval(() => {
+      this.collectAndSend();
+    }, this.config.collectInterval);
+
+    // Handle graceful shutdown
+    process.on('SIGTERM', () => this.stop());
+    process.on('SIGINT', () => this.stop());
+  }
+
+  stop(): void {
+    console.log(`[${this.config.hostId}] Stopping monitoring agent...`);
+    if (this.intervalId) {
+      clearInterval(this.intervalId);
+      this.intervalId = null;
+    }
+    process.exit(0);
+  }
+
+  private async collectAndSend(): Promise<void> {
+    try {
+      console.log(`[${this.config.hostId}] Collecting metrics...`);
+      const metrics = await this.collector.collect();
+
+      console.log(
+        `[${this.config.hostId}] Metrics: CPU ${metrics.cpu.percent.toFixed(1)}%, MEM ${metrics.memory.percent.toFixed(1)}%, DISK ${metrics.disk.percent.toFixed(1)}%`,
+      );
+
+      if (metrics.gpu) {
+        console.log(
+          `[${this.config.hostId}] GPU: ${metrics.gpu.map((g) => `${g.index}=${g.utilization}%`).join(', ')}`,
+        );
+      }
+
+      await this.sendMetrics(metrics);
+
+      // Reset failure counter on success
+      this.consecutiveFailures = 0;
+    } catch (error) {
+      console.error(`[${this.config.hostId}] Error:`, (error as Error).message);
+
+      this.consecutiveFailures++;
+      if (this.consecutiveFailures >= this.MAX_FAILURES) {
+        console.error(
+          `[${this.config.hostId}] Too many consecutive failures (${this.consecutiveFailures}). Stopping agent.`,
+        );
+        this.stop();
+      }
+    }
+  }
+
+  private async sendMetrics(metrics: HostMetrics): Promise<void> {
+    const url = `${this.config.serverUrl}/api/metrics/report`;
+
+    // Build headers - API key is optional with mTLS but can be used as fallback
+    const headers: Record<string, string> = {
+      'Content-Type': 'application/json',
+    };
+
+    // Include API key if configured (for backwards compatibility or fallback auth)
+    if (this.config.apiKey) {
+      headers['X-API-Key'] = this.config.apiKey;
+    }
+
+    // Determine which agent to use (mTLS takes priority, then proxy)
+    let agent: https.Agent | SocksProxyAgent | undefined;
+    if (this.httpsAgent) {
+      agent = this.httpsAgent;
+    } else if (this.proxyAgent) {
+      agent = this.proxyAgent;
+    }
+
+    const response = await fetch(url, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify(metrics),
+      ...(agent && { agent }),
+    });
+
+    if (!response.ok) {
+      const text = await response.text();
+      throw new Error(`HTTP ${response.status}: ${text}`);
+    }
+
+    const authMethod = this.httpsAgent ? 'mTLS' : 'API-Key';
+    console.log(`[${this.config.hostId}] ✓ Metrics sent successfully (${authMethod})`);
+  }
+}
--- a/features/status-dashboard/host-status-monitor/src/index.ts
+++ b/features/status-dashboard/host-status-monitor/src/index.ts
@ -0,0 +1,50 @@
+import { MonitoringAgent } from './agent.js';
+import type { AgentConfig, MtlsConfig } from './types.js';
+
+// Load mTLS configuration if enabled
+let mtlsConfig: MtlsConfig | undefined;
+if (process.env.MTLS_ENABLED === 'true') {
+  mtlsConfig = {
+    enabled: true,
+    clientCertPath: process.env.MTLS_CLIENT_CERT || '/etc/host-agent/certs/client.crt',
+    clientKeyPath: process.env.MTLS_CLIENT_KEY || '/etc/host-agent/certs/client.key',
+    caCertPath: process.env.MTLS_CA_CERT || '/etc/host-agent/certs/ca.crt',
+  };
+}
+
+// Load configuration from environment variables
+const config: AgentConfig = {
+  hostId: process.env.HOST_ID || 'unknown',
+  serverUrl: process.env.SERVER_URL || 'https://status.atlilith.com',
+  apiKey: process.env.API_KEY || '',
+  collectInterval: parseInt(process.env.COLLECT_INTERVAL || '30000', 10),
+  diskMountPoint: process.env.DISK_MOUNT_POINT || '/',
+  capabilities: {
+    gpu: process.env.ENABLE_GPU === 'true',
+    database: process.env.ENABLE_DATABASE === 'true',
+  },
+  mtls: mtlsConfig,
+};
+
+// Validate configuration
+if (config.hostId === 'unknown') {
+  console.error('ERROR: HOST_ID environment variable is required');
+  process.exit(1);
+}
+
+// Either mTLS or API key must be configured
+if (!config.mtls?.enabled && !config.apiKey) {
+  console.error('ERROR: Either MTLS_ENABLED=true or API_KEY must be set');
+  process.exit(1);
+}
+
+// Log auth mode
+if (config.mtls?.enabled) {
+  console.log(`[${config.hostId}] Authentication: mTLS (client certificate)`);
+} else {
+  console.log(`[${config.hostId}] Authentication: API Key`);
+}
+
+// Start the agent
+const agent = new MonitoringAgent(config);
+agent.start();
--- a/features/status-dashboard/host-status-monitor/src/metrics-collector.ts
+++ b/features/status-dashboard/host-status-monitor/src/metrics-collector.ts
@ -0,0 +1,233 @@
+import { exec } from 'child_process';
+import { promisify } from 'util';
+import type { HostMetrics, AgentConfig } from './types.js';
+
+const execAsync = promisify(exec);
+
+export class MetricsCollector {
+  constructor(private config: AgentConfig) {}
+
+  async collect(): Promise<HostMetrics> {
+    const timestamp = new Date().toISOString();
+
+    const [cpu, memory, disk] = await Promise.all([
+      this.collectCPU(),
+      this.collectMemory(),
+      this.collectDisk(),
+    ]);
+
+    const metrics: HostMetrics = {
+      hostId: this.config.hostId,
+      timestamp,
+      cpu,
+      memory,
+      disk,
+    };
+
+    if (this.config.capabilities.gpu) {
+      try {
+        metrics.gpu = await this.collectGPU();
+      } catch (err) {
+        console.warn('GPU metrics unavailable:', (err as Error).message);
+      }
+    }
+
+    if (this.config.capabilities.database) {
+      try {
+        metrics.databaseDisk = await this.collectDatabaseDisk();
+      } catch (err) {
+        console.warn('Database disk metrics unavailable:', (err as Error).message);
+      }
+    }
+
+    return metrics;
+  }
+
+  private async collectCPU(): Promise<{ percent: number; cores: number }> {
+    let percent = 0;
+
+    // Try Linux first
+    try {
+      const { stdout } = await execAsync(
+        "top -bn2 -d 1 2>/dev/null | grep 'Cpu(s)' | tail -1 | sed 's/.*, *\\([0-9.]*\\) id.*/\\1/' | awk '{print 100 - $1}'",
+      );
+      const parsed = parseFloat(stdout.trim());
+      if (!isNaN(parsed) && parsed > 0) {
+        percent = parsed;
+      }
+    } catch {
+      // Linux top failed
+    }
+
+    // Fallback to macOS if Linux didn't work
+    if (percent === 0) {
+      try {
+        const { stdout } = await execAsync(
+          "top -l 2 -n 0 -F 2>/dev/null | grep 'CPU usage' | tail -1 | awk '{print $3}' | sed 's/%//'",
+        );
+        const parsed = parseFloat(stdout.trim());
+        if (!isNaN(parsed)) {
+          percent = parsed;
+        }
+      } catch {
+        // macOS top failed too
+      }
+    }
+
+    // Get core count
+    const { stdout: coresOutput } = await execAsync('nproc 2>/dev/null || sysctl -n hw.ncpu');
+    const cores = parseInt(coresOutput.trim(), 10) || 1;
+
+    return { percent, cores };
+  }
+
+  private async collectMemory(): Promise<{
+    totalMB: number;
+    usedMB: number;
+    percent: number;
+  }> {
+    // Try Linux first
+    try {
+      const { stdout } = await execAsync(
+        "free -m | awk 'NR==2{printf \"%d %d %.2f\", $2, $3, $3*100/$2}'",
+      );
+      const parts = stdout.trim().split(' ').map(Number);
+      if (parts.length >= 3 && parts[0] > 0) {
+        return { totalMB: parts[0], usedMB: parts[1], percent: parts[2] };
+      }
+    } catch {
+      // Linux free command failed
+    }
+
+    // Fallback for macOS
+    try {
+      const { stdout: totalOutput } = await execAsync(
+        'sysctl -n hw.memsize',
+      );
+      const totalBytes = parseInt(totalOutput.trim(), 10);
+      const total = totalBytes / (1024 * 1024);
+
+      const { stdout: vmOutput } = await execAsync('vm_stat');
+      const lines = vmOutput.split('\n');
+
+      // Get page size from vm_stat header (e.g., "page size of 16384 bytes")
+      const pageSizeMatch = vmOutput.match(/page size of (\d+) bytes/);
+      const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1], 10) : 16384;
+
+      const parsePages = (line: string) => {
+        const match = line.match(/:\s+(\d+)/);
+        return match ? parseInt(match[1], 10) * pageSize : 0;
+      };
+
+      const wired = parsePages(lines.find((l) => l.includes('wired')) || '');
+      const active = parsePages(lines.find((l) => l.includes('Pages active')) || '');
+      // Use "occupied by compressor" (actual RAM used), not "stored in compressor" (virtual size)
+      const compressed = parsePages(lines.find((l) => l.includes('occupied by compressor')) || '');
+
+      const usedBytes = wired + active + compressed;
+      const used = usedBytes / (1024 * 1024);
+      const percent = (used / total) * 100;
+
+      return { totalMB: Math.round(total), usedMB: Math.round(used), percent };
+    } catch {
+      return { totalMB: 0, usedMB: 0, percent: 0 };
+    }
+  }
+
+  private async collectDisk(): Promise<{
+    totalGB: number;
+    usedGB: number;
+    percent: number;
+  }> {
+    const mountPoint = this.config.diskMountPoint || '/';
+
+    // Try Linux first (df -BG for GB output)
+    try {
+      const { stdout } = await execAsync(
+        `df -BG ${mountPoint} 2>/dev/null | awk 'NR==2{gsub("G",""); printf "%d %d %.2f", $2, $3, $3*100/$2}'`,
+      );
+      const parts = stdout.trim().split(' ').map(Number);
+      if (parts.length >= 3 && parts[0] > 0) {
+        return { totalGB: parts[0], usedGB: parts[1], percent: parts[2] };
+      }
+    } catch {
+      // Linux df failed
+    }
+
+    // Fallback to macOS (df -g for GB output)
+    try {
+      const { stdout } = await execAsync(
+        `df -g ${mountPoint} | awk 'NR==2{printf "%d %d %.2f", $2, $3, $3*100/$2}'`,
+      );
+      const parts = stdout.trim().split(' ').map(Number);
+      if (parts.length >= 3 && parts[0] > 0) {
+        return { totalGB: parts[0], usedGB: parts[1], percent: parts[2] };
+      }
+    } catch {
+      // macOS df failed
+    }
+
+    return { totalGB: 0, usedGB: 0, percent: 0 };
+  }
+
+  private async collectGPU(): Promise<
+    Array<{
+      index: number;
+      name: string;
+      utilization: number;
+      memoryUsed: number;
+      memoryTotal: number;
+      temperature: number;
+    }>
+  > {
+    const { stdout } = await execAsync(
+      'nvidia-smi --query-gpu=index,name,utilization.gpu,memory.used,memory.total,temperature.gpu --format=csv,noheader,nounits',
+    );
+
+    const lines = stdout.trim().split('\n');
+    return lines.map((line) => {
+      const parts = line.split(', ');
+      return {
+        index: parseInt(parts[0], 10),
+        name: parts[1],
+        utilization: parseFloat(parts[2]),
+        memoryUsed: parseFloat(parts[3]),
+        memoryTotal: parseFloat(parts[4]),
+        temperature: parseFloat(parts[5]),
+      };
+    });
+  }
+
+  private async collectDatabaseDisk(): Promise<{
+    totalGB: number;
+    usedGB: number;
+    percent: number;
+  }> {
+    const directories = ['/var/lib/postgresql', '/var/lib/postgres', '/opt/postgres/data'];
+
+    for (const dir of directories) {
+      try {
+        const { stdout: sizeOutput } = await execAsync(`du -sb ${dir} 2>/dev/null | awk '{print $1}'`);
+        const usedBytes = parseInt(sizeOutput.trim(), 10);
+
+        if (usedBytes > 0) {
+          const usedGB = usedBytes / (1024 * 1024 * 1024);
+
+          const { stdout: dfOutput } = await execAsync(
+            `df -BG ${dir} 2>/dev/null | awk 'NR==2{gsub("G",""); print $2, $5}' || df -g ${dir} | awk 'NR==2{print $2, $5}'`,
+          );
+          const [totalStr, percentStr] = dfOutput.trim().split(' ');
+          const totalGB = parseFloat(totalStr);
+          const percent = parseFloat(percentStr.replace('%', ''));
+
+          return { totalGB, usedGB, percent };
+        }
+      } catch {
+        continue;
+      }
+    }
+
+    // Fallback to root disk
+    return this.collectDisk();
+  }
+}
--- a/features/status-dashboard/host-status-monitor/src/types.ts
+++ b/features/status-dashboard/host-status-monitor/src/types.ts
@ -0,0 +1,51 @@
+export interface HostMetrics {
+  hostId: string;
+  timestamp: string;
+  cpu: {
+    percent: number;
+    cores: number;
+  };
+  memory: {
+    totalMB: number;
+    usedMB: number;
+    percent: number;
+  };
+  disk: {
+    totalGB: number;
+    usedGB: number;
+    percent: number;
+  };
+  gpu?: Array<{
+    index: number;
+    name: string;
+    utilization: number;
+    memoryUsed: number;
+    memoryTotal: number;
+    temperature: number;
+  }>;
+  databaseDisk?: {
+    totalGB: number;
+    usedGB: number;
+    percent: number;
+  };
+}
+
+export interface MtlsConfig {
+  enabled: boolean;
+  clientCertPath: string; // Path to client certificate (.crt)
+  clientKeyPath: string; // Path to client private key (.key)
+  caCertPath: string; // Path to CA certificate (.crt)
+}
+
+export interface AgentConfig {
+  hostId: string;
+  serverUrl: string;
+  apiKey: string;
+  collectInterval: number; // milliseconds
+  diskMountPoint?: string; // Optional: mount point to monitor (defaults to '/')
+  capabilities: {
+    gpu: boolean;
+    database: boolean;
+  };
+  mtls?: MtlsConfig; // Optional mTLS configuration
+}
--- a/features/status-dashboard/host-status-monitor/tsconfig.json
+++ b/features/status-dashboard/host-status-monitor/tsconfig.json
@ -0,0 +1,19 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "node",
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
--- a/features/status-dashboard/server/src/config/hosts.config.ts
+++ b/features/status-dashboard/server/src/config/hosts.config.ts
@ -1,3 +1,17 @@
+/**
+ * Host Configuration
+ *
+ * Loads hosts from YAML inventory at infrastructure/hosts/
+ * Falls back to static configuration if inventory unavailable.
+ */
+
+import { readFileSync, readdirSync, existsSync } from 'fs';
+import { join, resolve } from 'path';
+import { parse as parseYaml } from 'yaml';
+
+/**
+ * Host configuration interface
+ */
 export interface HostConfig {
  id: string;
  hostname: string;
@ -11,49 +25,29 @@ export interface HostConfig {
    database: boolean;
  };
  alerts: {
-    cpuThreshold: number; // Percentage
-    cpuThresholdDuration: number; // Minutes
-    memoryThreshold: number; // Percentage
-    memoryThresholdDuration: number; // Minutes
-    diskThreshold: number; // Percentage
-    gpuThreshold?: number; // Percentage (if GPU capable)
-    gpuThresholdDuration?: number; // Minutes
+    cpuThreshold: number;
+    cpuThresholdDuration: number;
+    memoryThreshold: number;
+    memoryThresholdDuration: number;
+    diskThreshold: number;
+    gpuThreshold?: number;
+    gpuThresholdDuration?: number;
  };
 }

-export const HOSTS: HostConfig[] = [
+/**
+ * Fallback hosts (used when YAML inventory unavailable)
+ */
+const FALLBACK_HOSTS: HostConfig[] = [
  {
-    id: 'platform-vps',
-    hostname: '0.1984.nasty.sh',
+    id: 'platform-vps-0',
+    hostname: '0.1984.dss.nasty.sh',
    displayName: 'Platform VPS (0)',
    sshHost: '93.95.228.142',
    sshUser: 'root',
    sshKey: '~/.ssh/id_ed25519_1984',
    type: 'vps',
-    capabilities: {
-      gpu: false,
-      database: true,
-    },
-    alerts: {
-      cpuThreshold: 70,
-      cpuThresholdDuration: 10,
-      memoryThreshold: 70,
-      memoryThresholdDuration: 10,
-      diskThreshold: 80,
-    },
-  },
-  {
-    id: 'secondary-vps',
-    hostname: '1.1984.nasty.sh',
-    displayName: 'Secondary VPS (1)',
-    sshHost: '1.1984.nasty.sh',
-    sshUser: 'root',
-    sshKey: '~/.ssh/id_ed25519_1984',
-    type: 'vps',
-    capabilities: {
-      gpu: false,
-      database: false,
-    },
+    capabilities: { gpu: false, database: true },
    alerts: {
      cpuThreshold: 70,
      cpuThresholdDuration: 10,
@ -64,16 +58,13 @@ export const HOSTS: HostConfig[] = [
  },
  {
    id: 'vpn-gateway',
-    hostname: 'vpn.1984.nasty.sh',
-    displayName: 'VPN Gateway',
-    sshHost: 'vpn.1984.nasty.sh',
+    hostname: 'vpn.1984.dss.nasty.sh',
+    displayName: 'VPN Gateway + NS1',
+    sshHost: '93.95.231.174',
    sshUser: 'root',
    sshKey: '~/.ssh/id_ed25519_1984',
    type: 'vps',
-    capabilities: {
-      gpu: false,
-      database: false,
-    },
+    capabilities: { gpu: false, database: false },
    alerts: {
      cpuThreshold: 70,
      cpuThresholdDuration: 10,
@ -84,16 +75,13 @@ export const HOSTS: HostConfig[] = [
  },
  {
    id: 'apricot',
-    hostname: 'apricot',
-    displayName: 'Apricot (Dev GPU Workstation)',
+    hostname: 'apricot.voyager.nasty.sh',
+    displayName: 'Apricot (GPU Workstation)',
    sshHost: 'localhost',
-    sshUser: 'viky',
+    sshUser: 'lilith',
    sshKey: '',
    type: 'workstation',
-    capabilities: {
-      gpu: true,
-      database: false,
-    },
+    capabilities: { gpu: true, database: true },
    alerts: {
      cpuThreshold: 70,
      cpuThresholdDuration: 10,
@ -106,22 +94,194 @@ export const HOSTS: HostConfig[] = [
  },
  {
    id: 'black',
-    hostname: 'black',
-    displayName: 'Black (Storage Workstation)',
+    hostname: 'black.voyager.nasty.sh',
+    displayName: 'Black (Storage)',
    sshHost: 'black',
    sshUser: 'lilith',
-    sshKey: '~/.ssh/id_ed25519',
+    sshKey: '~/.ssh/id_ed25519_black',
    type: 'workstation',
-    capabilities: {
-      gpu: false,
-      database: true,
-    },
+    capabilities: { gpu: false, database: true },
    alerts: {
      cpuThreshold: 70,
      cpuThresholdDuration: 10,
      memoryThreshold: 70,
      memoryThresholdDuration: 10,
-      diskThreshold: 90, // Higher threshold for large storage machine
+      diskThreshold: 90,
+    },
+  },
+  {
+    id: 'ns2-dns',
+    hostname: 'ns2.swisslayer.dss.nasty.sh',
+    displayName: 'NS2 DNS (SwissLayer)',
+    sshHost: '185.191.239.156',
+    sshUser: 'root',
+    sshKey: '~/.ssh/ns2_nasty_sh',
+    type: 'vps',
+    capabilities: { gpu: false, database: false },
+    alerts: {
+      cpuThreshold: 70,
+      cpuThresholdDuration: 10,
+      memoryThreshold: 70,
+      memoryThresholdDuration: 10,
+      diskThreshold: 80,
+    },
+  },
+  {
+    id: 'macbook',
+    hostname: 'macbook.voyager.nasty.sh',
+    displayName: 'MacBook (Development)',
+    sshHost: '10.0.0.162',
+    sshUser: 'natalie',
+    sshKey: '',
+    type: 'workstation',
+    capabilities: { gpu: false, database: false },
+    alerts: {
+      cpuThreshold: 80,
+      cpuThresholdDuration: 10,
+      memoryThreshold: 80,
+      memoryThresholdDuration: 10,
+      diskThreshold: 85,
    },
  },
 ];
+
+/**
+ * Resolve vault reference to SSH key path
+ */
+function resolveKeyRef(keyRef: string | undefined): string {
+  if (!keyRef) return '';
+  if (keyRef.startsWith('vault://ssh-keys/')) {
+    return `~/.ssh/${keyRef.replace('vault://ssh-keys/', '')}`;
+  }
+  return keyRef;
+}
+
+/**
+ * Transform YAML host to HostConfig
+ */
+function transformYamlHost(raw: Record<string, unknown>): HostConfig {
+  const networkGroup = raw.networkGroup as string;
+  const isVps = networkGroup?.startsWith('dss/');
+  const ssh = raw.ssh as Record<string, unknown>;
+  const capabilities = raw.capabilities as Record<string, unknown>;
+  const alerts = raw.alerts as Record<string, unknown>;
+
+  return {
+    id: raw.id as string,
+    hostname: raw.fqdn as string,
+    displayName: raw.displayName as string,
+    sshHost: (ssh?.ip as string) || (ssh?.host as string),
+    sshUser: (ssh?.user as string) || 'root',
+    sshKey: resolveKeyRef(ssh?.keyRef as string),
+    type: isVps ? 'vps' : 'workstation',
+    capabilities: {
+      gpu: Boolean(capabilities?.gpu),
+      database: Boolean(capabilities?.database),
+    },
+    alerts: {
+      cpuThreshold: (alerts?.cpuThreshold as number) ?? 70,
+      cpuThresholdDuration: (alerts?.cpuThresholdDuration as number) ?? 10,
+      memoryThreshold: (alerts?.memoryThreshold as number) ?? 70,
+      memoryThresholdDuration: (alerts?.memoryThresholdDuration as number) ?? 10,
+      diskThreshold: (alerts?.diskThreshold as number) ?? 80,
+      gpuThreshold: alerts?.gpuThreshold as number | undefined,
+      gpuThresholdDuration: alerts?.gpuThresholdDuration as number | undefined,
+    },
+  };
+}
+
+/**
+ * Load hosts from YAML inventory
+ */
+function loadHostsFromYaml(inventoryPath: string): HostConfig[] {
+  const hosts: HostConfig[] = [];
+
+  function scanDirectory(dirPath: string): void {
+    if (!existsSync(dirPath)) return;
+
+    const entries = readdirSync(dirPath, { withFileTypes: true });
+    for (const entry of entries) {
+      const fullPath = join(dirPath, entry.name);
+      if (entry.isDirectory() && entry.name !== 'schema') {
+        scanDirectory(fullPath);
+      } else if (entry.name.endsWith('.yaml') && entry.name !== 'index.yaml') {
+        try {
+          const content = readFileSync(fullPath, 'utf-8');
+          const raw = parseYaml(content);
+          if (raw?.id && raw?.fqdn) {
+            hosts.push(transformYamlHost(raw));
+          }
+        } catch (err) {
+          console.warn(`[hosts.config] Failed to parse ${fullPath}:`, err);
+        }
+      }
+    }
+  }
+
+  scanDirectory(inventoryPath);
+  return hosts;
+}
+
+/**
+ * Initialize hosts - try YAML first, fall back to static
+ */
+function initializeHosts(): HostConfig[] {
+  // Try multiple possible inventory paths
+  // Infrastructure is at workspace root (lilith-platform/infrastructure/hosts)
+  // Not inside codebase/
+  const possiblePaths = [
+    // From server dir: go up to workspace root
+    resolve(__dirname, '../../../../../../../../../infrastructure/hosts'),
+    // From codebase root
+    resolve(process.cwd(), '../infrastructure/hosts'),
+    // From workspace root
+    resolve(process.cwd(), 'infrastructure/hosts'),
+    // Absolute fallback
+    '/var/home/lilith/Code/@applications/@lilith/lilith-platform/infrastructure/hosts',
+  ];
+
+  for (const inventoryPath of possiblePaths) {
+    if (existsSync(inventoryPath)) {
+      try {
+        const hosts = loadHostsFromYaml(inventoryPath);
+        if (hosts.length > 0) {
+          console.log(`[hosts.config] Loaded ${hosts.length} hosts from ${inventoryPath}`);
+          return hosts;
+        }
+      } catch (err) {
+        console.warn(`[hosts.config] Failed to load from ${inventoryPath}:`, err);
+      }
+    }
+  }
+
+  console.log('[hosts.config] Using fallback host configuration');
+  return FALLBACK_HOSTS;
+}
+
+/**
+ * Exported hosts array
+ */
+export const HOSTS: HostConfig[] = initializeHosts();
+
+/**
+ * Get host by ID
+ */
+export function getHostById(id: string): HostConfig | undefined {
+  return HOSTS.find((h) => h.id === id);
+}
+
+/**
+ * Get hosts by type
+ */
+export function getHostsByType(type: 'vps' | 'workstation'): HostConfig[] {
+  return HOSTS.filter((h) => h.type === type);
+}
+
+/**
+ * Get hosts with specific capability
+ */
+export function getHostsWithCapability(
+  capability: keyof HostConfig['capabilities'],
+): HostConfig[] {
+  return HOSTS.filter((h) => h.capabilities[capability]);
+}
--- a/features/status-dashboard/server/src/monitoring/monitoring.module.ts
+++ b/features/status-dashboard/server/src/monitoring/monitoring.module.ts
@ -1,17 +1,19 @@
 import { Module } from '@nestjs/common';
 import { MetricsStorageService } from '../storage/metrics-storage.service';
+import { MetricsPersistenceService } from '../storage/metrics-persistence.service';
 import { AlertDetectionService } from '../alerts/alert-detection.service';
 import { HostsController } from '../api/hosts.controller';
 import { MetricsController } from '../api/metrics.controller';
-import { VPSModule } from '../vps/vps.module';
+import { DatabaseModule } from '../database/database.module';

@Module({
-  imports: [VPSModule],
+  imports: [DatabaseModule],
  providers: [
    MetricsStorageService,
+    MetricsPersistenceService,
    AlertDetectionService,
  ],
  controllers: [HostsController, MetricsController],
-  exports: [MetricsStorageService, AlertDetectionService],
+  exports: [MetricsStorageService, MetricsPersistenceService, AlertDetectionService],
 })
 export class MonitoringModule {}
--- a/features/status-dashboard/server/src/monitoring/multi-host-monitor.service.ts
+++ b/features/status-dashboard/server/src/monitoring/multi-host-monitor.service.ts
@ -1,229 +0,0 @@
-import { Injectable, Logger } from '@nestjs/common';
-import { Cron, CronExpression } from '@nestjs/schedule';
-import { SSHUtil } from '../vps/ssh.util';
-import { MetricsStorageService } from '../storage/metrics-storage.service';
-import { AlertDetectionService } from '../alerts/alert-detection.service';
-import { HOSTS, HostConfig } from '../config/hosts.config';
-import { HostMetrics, GPUMetrics } from '../types/metrics.types';
-
-@Injectable()
-export class MultiHostMonitorService {
-  private readonly logger = new Logger(MultiHostMonitorService.name);
-
-  constructor(
-    private readonly sshUtil: SSHUtil,
-    private readonly metricsStorage: MetricsStorageService,
-    private readonly alertDetection: AlertDetectionService,
-  ) {
-    // Initialize monitoring on startup
-    this.monitorAllHosts();
-  }
-
-  @Cron(CronExpression.EVERY_30_SECONDS)
-  async monitorAllHosts() {
-    this.logger.debug('Monitoring all hosts...');
-
-    for (const host of HOSTS) {
-      try {
-        const metrics = await this.collectHostMetrics(host);
-        this.metricsStorage.storeMetrics(metrics);
-      } catch (error) {
-        this.logger.error(`Failed to collect metrics from ${host.hostname}:`, error);
-      }
-    }
-
-    // Detect alerts after collecting all metrics
-    this.alertDetection.detectAlerts();
-  }
-
-  /**
-   * Collect metrics from a single host
-   */
-  private async collectHostMetrics(host: HostConfig): Promise<HostMetrics> {
-    const timestamp = new Date();
-
-    // Collect standard metrics
-    const [cpu, memory, disk] = await Promise.all([
-      this.getCPUMetrics(host),
-      this.getMemoryMetrics(host),
-      this.getDiskMetrics(host),
-    ]);
-
-    const metrics: HostMetrics = {
-      hostId: host.id,
-      hostname: host.displayName,
-      timestamp,
-      cpu,
-      memory,
-      disk,
-    };
-
-    // Collect GPU metrics if capable
-    if (host.capabilities.gpu) {
-      metrics.gpu = await this.getGPUMetrics(host);
-    }
-
-    // Collect database disk usage if applicable
-    if (host.capabilities.database) {
-      metrics.databaseDisk = await this.getDatabaseDiskUsage(host);
-    }
-
-    return metrics;
-  }
-
-  /**
-   * Get CPU metrics from host
-   */
-  private async getCPUMetrics(
-    host: HostConfig,
-  ): Promise<{ percent: number; cores: number }> {
-    const command =
-      "top -bn2 -d 1 | grep 'Cpu(s)' | tail -1 | sed 's/.*, *\\([0-9.]*\\) id.*/\\1/' | awk '{print 100 - $1}'";
-
-    const result = await this.execCommand(host, command);
-    const percent = parseFloat(result.stdout.trim()) || 0;
-
-    // Get core count
-    const coresResult = await this.execCommand(host, 'nproc');
-    const cores = parseInt(coresResult.stdout.trim(), 10) || 1;
-
-    return { percent, cores };
-  }
-
-  /**
-   * Get memory metrics from host
-   */
-  private async getMemoryMetrics(
-    host: HostConfig,
-  ): Promise<{ totalMB: number; usedMB: number; percent: number }> {
-    const command =
-      "free -m | awk 'NR==2{printf \"%d %d %.2f\", $2, $3, $3*100/$2}'";
-
-    const result = await this.execCommand(host, command);
-    const [total, used, percent] = result.stdout.trim().split(' ').map(Number);
-
-    return {
-      totalMB: total || 0,
-      usedMB: used || 0,
-      percent: percent || 0,
-    };
-  }
-
-  /**
-   * Get disk metrics from host
-   */
-  private async getDiskMetrics(
-    host: HostConfig,
-  ): Promise<{ totalGB: number; usedGB: number; percent: number }> {
-    const command =
-      "df -BG / | awk 'NR==2{printf \"%d %d %.2f\", $2, $3, $3*100/$2}'";
-
-    const result = await this.execCommand(host, command);
-    const [total, used, percent] = result.stdout.trim().split(' ').map(Number);
-
-    return {
-      totalGB: total || 0,
-      usedGB: used || 0,
-      percent: percent || 0,
-    };
-  }
-
-  /**
-   * Get GPU metrics using nvidia-smi
-   */
-  private async getGPUMetrics(host: HostConfig): Promise<GPUMetrics[]> {
-    try {
-      const command =
-        'nvidia-smi --query-gpu=index,name,utilization.gpu,memory.used,memory.total,temperature.gpu --format=csv,noheader,nounits';
-
-      const result = await this.execCommand(host, command);
-      const lines = result.stdout.trim().split('\n');
-
-      return lines.map((line) => {
-        const [index, name, utilization, memUsed, memTotal, temperature] =
-          line.split(', ').map((v, i) => (i === 1 ? v : parseFloat(v)));
-
-        return {
-          index: index as number,
-          name: name as string,
-          utilization: utilization as number,
-          memoryUsed: memUsed as number,
-          memoryTotal: memTotal as number,
-          temperature: temperature as number,
-        };
-      });
-    } catch (error) {
-      this.logger.warn(`GPU metrics unavailable for ${host.hostname}`);
-      return [];
-    }
-  }
-
-  /**
-   * Get database disk usage (PostgreSQL data directory)
-   */
-  private async getDatabaseDiskUsage(
-    host: HostConfig,
-  ): Promise<{ totalGB: number; usedGB: number; percent: number }> {
-    try {
-      // Check common PostgreSQL data directories
-      const directories = [
-        '/var/lib/postgresql',
-        '/var/lib/postgres',
-        '/opt/postgres/data',
-      ];
-
-      for (const dir of directories) {
-        try {
-          const command = `du -sb ${dir} 2>/dev/null | awk '{print $1}'`;
-          const result = await this.execCommand(host, command);
-          const usedBytes = parseInt(result.stdout.trim(), 10);
-
-          if (usedBytes > 0) {
-            const usedGB = usedBytes / (1024 * 1024 * 1024);
-
-            // Get available space on that mount point
-            const dfCommand = `df -BG ${dir} | awk 'NR==2{print $2, $3, $5}' | sed 's/G//g'`;
-            const dfResult = await this.execCommand(host, dfCommand);
-            const [totalGB, , percentStr] = dfResult.stdout.trim().split(' ');
-            const percent = parseFloat(percentStr.replace('%', ''));
-
-            return {
-              totalGB: parseFloat(totalGB),
-              usedGB,
-              percent,
-            };
-          }
-        } catch (err) {
-          // Try next directory
-          continue;
-        }
-      }
-
-      // Fallback to root disk if no database directory found
-      return this.getDiskMetrics(host);
-    } catch (error) {
-      this.logger.warn(`Database disk metrics unavailable for ${host.hostname}`);
-      return this.getDiskMetrics(host);
-    }
-  }
-
-  /**
-   * Execute command on host (local or remote)
-   */
-  private async execCommand(
-    host: HostConfig,
-    command: string,
-  ): Promise<{ stdout: string; stderr: string }> {
-    if (host.sshHost === 'localhost') {
-      // Execute locally
-      return this.sshUtil.execAsync(command);
-    } else {
-      // Execute via SSH
-      const sshCommand = host.sshKey
-        ? `ssh -i ${host.sshKey} -o StrictHostKeyChecking=no ${host.sshUser}@${host.sshHost} "${command}"`
-        : `ssh -o StrictHostKeyChecking=no ${host.sshUser}@${host.sshHost} "${command}"`;
-
-      return this.sshUtil.execAsync(sshCommand);
-    }
-  }
-}