diff --git a/features/status-dashboard/host-status-monitor/com.lilith.host-status-monitor-healthcheck.plist b/features/status-dashboard/host-status-monitor/com.lilith.host-status-monitor-healthcheck.plist
new file mode 100644
index 000000000..fab641b0e
--- /dev/null
+++ b/features/status-dashboard/host-status-monitor/com.lilith.host-status-monitor-healthcheck.plist
@@ -0,0 +1,26 @@
+
+
+
+
+ Label
+ com.lilith.host-status-monitor-healthcheck
+
+ ProgramArguments
+
+ /bin/bash
+ /opt/host-status-monitor/healthcheck
+
+
+ StartInterval
+ 120
+
+ RunAtLoad
+
+
+ StandardOutPath
+ /var/log/host-status-monitor-healthcheck.log
+
+ StandardErrorPath
+ /var/log/host-status-monitor-healthcheck.log
+
+
diff --git a/features/status-dashboard/host-status-monitor/deploy.sh b/features/status-dashboard/host-status-monitor/deploy.sh
index 2542f271a..6c52202c7 100755
--- a/features/status-dashboard/host-status-monitor/deploy.sh
+++ b/features/status-dashboard/host-status-monitor/deploy.sh
@@ -231,13 +231,55 @@ WRAPPER
run_remote "$host" "sudo systemctl daemon-reload && sudo systemctl enable host-status-monitor && sudo systemctl restart host-status-monitor"
fi
- echo "7. Checking status..."
+ echo "7. Installing health check..."
+ if is_macos_host "$host"; then
+ # macOS: use launchd
+ scp "$SCRIPT_DIR/host-status-monitor-healthcheck" "$target:/tmp/healthcheck"
+ scp "$SCRIPT_DIR/com.lilith.host-status-monitor-healthcheck.plist" "$target:/tmp/"
+ run_remote "$host" "mv /tmp/healthcheck $INSTALL_DIR/healthcheck && chmod +x $INSTALL_DIR/healthcheck"
+ run_remote "$host" "launchctl unload /Library/LaunchDaemons/com.lilith.host-status-monitor-healthcheck.plist 2>/dev/null || true"
+ run_remote "$host" "mv /tmp/com.lilith.host-status-monitor-healthcheck.plist /Library/LaunchDaemons/"
+ run_remote "$host" "launchctl load /Library/LaunchDaemons/com.lilith.host-status-monitor-healthcheck.plist"
+ elif [ "$host" = "apricot" ]; then
+ # Local Linux host
+ sudo cp "$SCRIPT_DIR/host-status-monitor-healthcheck" "$INSTALL_DIR/healthcheck"
+ sudo chmod +x "$INSTALL_DIR/healthcheck"
+ sudo cp "$SCRIPT_DIR/host-status-monitor-healthcheck.service" /etc/systemd/system/
+ sudo cp "$SCRIPT_DIR/host-status-monitor-healthcheck.timer" /etc/systemd/system/
+ sudo systemctl daemon-reload
+ sudo systemctl enable --now host-status-monitor-healthcheck.timer
+ elif uses_ssh_key "$host"; then
+ # Remote Linux with SSH key
+ scp -i "$SSH_KEY" $SSH_OPTS "$SCRIPT_DIR/host-status-monitor-healthcheck" "$target:$INSTALL_DIR/healthcheck"
+ run_remote "$host" "chmod +x $INSTALL_DIR/healthcheck"
+ scp -i "$SSH_KEY" $SSH_OPTS "$SCRIPT_DIR/host-status-monitor-healthcheck.service" "$target:/etc/systemd/system/"
+ scp -i "$SSH_KEY" $SSH_OPTS "$SCRIPT_DIR/host-status-monitor-healthcheck.timer" "$target:/etc/systemd/system/"
+ run_remote "$host" "systemctl daemon-reload && systemctl enable --now host-status-monitor-healthcheck.timer"
+ elif needs_sudo "$host"; then
+ # Remote Linux with sudo
+ scp "$SCRIPT_DIR/host-status-monitor-healthcheck" "$target:/tmp/healthcheck"
+ scp "$SCRIPT_DIR/host-status-monitor-healthcheck.service" "$target:/tmp/host-status-monitor-healthcheck.service"
+ scp "$SCRIPT_DIR/host-status-monitor-healthcheck.timer" "$target:/tmp/host-status-monitor-healthcheck.timer"
+ run_remote "$host" "mv /tmp/healthcheck $INSTALL_DIR/healthcheck && chmod +x $INSTALL_DIR/healthcheck"
+ run_remote "$host" "mv /tmp/host-status-monitor-healthcheck.service /tmp/host-status-monitor-healthcheck.timer /etc/systemd/system/"
+ run_remote "$host" "systemctl daemon-reload && systemctl enable --now host-status-monitor-healthcheck.timer"
+ else
+ # Remote Linux without sudo
+ scp "$SCRIPT_DIR/host-status-monitor-healthcheck" "$target:$INSTALL_DIR/healthcheck"
+ run_remote "$host" "chmod +x $INSTALL_DIR/healthcheck"
+ scp "$SCRIPT_DIR/host-status-monitor-healthcheck.service" "$target:/etc/systemd/system/"
+ scp "$SCRIPT_DIR/host-status-monitor-healthcheck.timer" "$target:/etc/systemd/system/"
+ run_remote "$host" "sudo systemctl daemon-reload && sudo systemctl enable --now host-status-monitor-healthcheck.timer"
+ fi
+
+ echo "8. Checking status..."
sleep 2
if is_macos_host "$host"; then
run_remote "$host" "sudo launchctl list | grep host-status-monitor" || true
run_remote "$host" "tail -5 /var/log/host-status-monitor.log 2>/dev/null" || true
else
run_remote "$host" "systemctl status host-status-monitor --no-pager" || true
+ run_remote "$host" "systemctl list-timers host-status-monitor-healthcheck.timer --no-pager" || true
fi
echo ""
diff --git a/features/status-dashboard/host-status-monitor/deploy/plum.env b/features/status-dashboard/host-status-monitor/deploy/plum.env
index b5a3b4670..26ee0b5aa 100644
--- a/features/status-dashboard/host-status-monitor/deploy/plum.env
+++ b/features/status-dashboard/host-status-monitor/deploy/plum.env
@@ -20,5 +20,5 @@ MTLS_CA_CERT=/etc/host-status-monitor/certs/ca.crt
# Option 2: API Key (fallback)
# API_KEY=
-# VPN Proxy (route through VPN gateway for controlled egress)
-VPN_PROXY_URL=socks5://10.8.0.1:1080
+# VPN Proxy - disabled for plum (no WireGuard installed)
+# VPN_PROXY_URL=socks5://10.8.0.1:1080
diff --git a/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck b/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck
new file mode 100644
index 000000000..0f4d7ac28
--- /dev/null
+++ b/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Health check for host-status-monitor service
+# Cross-platform: Linux (systemd) and macOS (launchd)
+
+SERVICE="host-status-monitor"
+PLIST_LABEL="com.lilith.host-status-monitor"
+LOG_TAG="host-status-monitor-healthcheck"
+MAX_FAILURES=3
+
+# Detect OS
+if [[ "$(uname)" == "Darwin" ]]; then
+ IS_MACOS=true
+else
+ IS_MACOS=false
+fi
+
+log_message() {
+ if $IS_MACOS; then
+ /usr/bin/logger -t "$LOG_TAG" "$1"
+ else
+ logger -t "$LOG_TAG" "$1"
+ fi
+}
+
+# Check if service is running
+is_service_running() {
+ if $IS_MACOS; then
+ launchctl list | grep -q "$PLIST_LABEL"
+ else
+ systemctl is-active --quiet "$SERVICE"
+ fi
+}
+
+# Start/restart service
+restart_service() {
+ if $IS_MACOS; then
+ launchctl kickstart -k "system/$PLIST_LABEL" 2>/dev/null || \
+ (launchctl unload "/Library/LaunchDaemons/${PLIST_LABEL}.plist" 2>/dev/null; \
+ launchctl load "/Library/LaunchDaemons/${PLIST_LABEL}.plist")
+ else
+ systemctl restart "$SERVICE"
+ fi
+}
+
+# Get recent logs
+get_recent_logs() {
+ if $IS_MACOS; then
+ # macOS: check log file
+ if [[ -f /var/log/host-status-monitor.log ]]; then
+ tail -50 /var/log/host-status-monitor.log 2>/dev/null
+ else
+ # Try system log
+ log show --predicate "process == 'node'" --last 2m 2>/dev/null | grep -i "host-status-monitor" || true
+ fi
+ else
+ journalctl -u "$SERVICE" --since "2 minutes ago" -q --no-pager 2>/dev/null
+ fi
+}
+
+# Check if service is active
+if ! is_service_running; then
+ log_message "Service not running, starting"
+ restart_service
+ exit $?
+fi
+
+# Check recent logs for success or failure patterns
+RECENT_LOGS=$(get_recent_logs)
+
+# Count recent successes and failures
+SUCCESS_COUNT=$(echo "$RECENT_LOGS" | grep -c "Metrics sent successfully" || true)
+FAILURE_COUNT=$(echo "$RECENT_LOGS" | grep -c "Error:" || true)
+
+# If we have successes recently, we're healthy
+if [[ "$SUCCESS_COUNT" -gt 0 ]]; then
+ exit 0
+fi
+
+# If we have too many failures and no successes, restart
+if [[ "$FAILURE_COUNT" -ge "$MAX_FAILURES" ]]; then
+ log_message "Too many failures ($FAILURE_COUNT), no successes - restarting"
+ restart_service
+ sleep 5
+
+ if is_service_running; then
+ log_message "Service restarted successfully"
+ exit 0
+ else
+ log_message "Service failed to restart"
+ exit 1
+ fi
+fi
+
+# Service is running but no recent activity - might be starting up, allow it
+exit 0
diff --git a/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck.service b/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck.service
new file mode 100644
index 000000000..c64e71ea9
--- /dev/null
+++ b/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck.service
@@ -0,0 +1,7 @@
+[Unit]
+Description=Host Status Monitor Health Check
+After=host-status-monitor.service
+
+[Service]
+Type=oneshot
+ExecStart=/opt/host-status-monitor/healthcheck
diff --git a/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck.timer b/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck.timer
new file mode 100644
index 000000000..c02d857e2
--- /dev/null
+++ b/features/status-dashboard/host-status-monitor/host-status-monitor-healthcheck.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=Run host-status-monitor health check every 2 minutes
+
+[Timer]
+OnBootSec=180
+OnUnitActiveSec=120
+AccuracySec=30
+
+[Install]
+WantedBy=timers.target