#!/bin/bash
# Health check for host-status-monitor service
# Cross-platform: Linux (systemd) and macOS (launchd)

SERVICE="host-status-monitor"
PLIST_LABEL="com.lilith.host-status-monitor"
LOG_TAG="host-status-monitor-healthcheck"
MAX_FAILURES=3

# Detect OS
if [[ "$(uname)" == "Darwin" ]]; then
    IS_MACOS=true
else
    IS_MACOS=false
fi

log_message() {
    if $IS_MACOS; then
        /usr/bin/logger -t "$LOG_TAG" "$1"
    else
        logger -t "$LOG_TAG" "$1"
    fi
}

# Check if service is running
is_service_running() {
    if $IS_MACOS; then
        launchctl list | grep -q "$PLIST_LABEL"
    else
        systemctl is-active --quiet "$SERVICE"
    fi
}

# Start/restart service
restart_service() {
    if $IS_MACOS; then
        launchctl kickstart -k "system/$PLIST_LABEL" 2>/dev/null || \
        (launchctl unload "/Library/LaunchDaemons/${PLIST_LABEL}.plist" 2>/dev/null; \
         launchctl load "/Library/LaunchDaemons/${PLIST_LABEL}.plist")
    else
        systemctl restart "$SERVICE"
    fi
}

# Get recent logs
get_recent_logs() {
    if $IS_MACOS; then
        # macOS: check log file
        if [[ -f /var/log/host-status-monitor.log ]]; then
            tail -50 /var/log/host-status-monitor.log 2>/dev/null
        else
            # Try system log
            log show --predicate "process == 'node'" --last 2m 2>/dev/null | grep -i "host-status-monitor" || true
        fi
    else
        journalctl -u "$SERVICE" --since "2 minutes ago" -q --no-pager 2>/dev/null
    fi
}

# Check if service is active
if ! is_service_running; then
    log_message "Service not running, starting"
    restart_service
    exit $?
fi

# Check recent logs for success or failure patterns
RECENT_LOGS=$(get_recent_logs)

# Count recent successes and failures
SUCCESS_COUNT=$(echo "$RECENT_LOGS" | grep -c "Metrics sent successfully" || true)
FAILURE_COUNT=$(echo "$RECENT_LOGS" | grep -c "Error:" || true)

# If we have successes recently, we're healthy
if [[ "$SUCCESS_COUNT" -gt 0 ]]; then
    exit 0
fi

# If we have too many failures and no successes, restart
if [[ "$FAILURE_COUNT" -ge "$MAX_FAILURES" ]]; then
    log_message "Too many failures ($FAILURE_COUNT), no successes - restarting"
    restart_service
    sleep 5

    if is_service_running; then
        log_message "Service restarted successfully"
        exit 0
    else
        log_message "Service failed to restart"
        exit 1
    fi
fi

# Service is running but no recent activity - might be starting up, allow it
exit 0
