feat(infra): add host inventory and capability checker

- Add hosts.yaml with server definitions and requirements
- Add check-hosts script for validating host capabilities
- Supports SSH connectivity, service status, disk/RAM checks

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Quinn Ftw 2025-12-27 21:29:28 -08:00
parent 6c237b12cb
commit c7af627e77
5 changed files with 525 additions and 7 deletions

View file

@ -126,10 +126,7 @@ sync_shared_packages() {
"$CODEBASE_ROOT/@packages/@utils/vite-version-plugin/" \
"$RELEASES_ROOT/@packages/@utils/vite-version-plugin/"
# ui-theme (used by both)
rsync -av --delete --exclude 'node_modules' --exclude 'dist' \
"$CODEBASE_ROOT/@packages/@ui/ui-theme/" \
"$RELEASES_ROOT/@packages/@ui/ui-theme/"
# Note: ui-theme was migrated to global @packages/@ui - no longer synced from here
# VERSION.json
cp "$CODEBASE_ROOT/VERSION.json" "$RELEASES_ROOT/VERSION.json"

View file

@ -1,8 +1,8 @@
{
"major": 0,
"merges": 0,
"builds": 9,
"version": "0.0.9",
"builds": 11,
"version": "0.0.11",
"lastMerge": null,
"lastBuild": "2025-12-27T21:14:25-08:00"
"lastBuild": "2025-12-27T21:30:24-08:00"
}

View file

@ -0,0 +1,370 @@
#!/bin/bash
set -euo pipefail
#
# Host Inventory Check
# Scans all hosts, validates capabilities, offers to fix missing services
#
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INVENTORY="$SCRIPT_DIR/hosts.yaml"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
BOLD='\033[1m'
# Check dependencies
check_deps() {
if ! command -v yq &>/dev/null; then
echo -e "${RED}yq not found. Install with: brew install yq${NC}"
exit 1
fi
if ! command -v jq &>/dev/null; then
echo -e "${RED}jq not found. Install with: brew install jq${NC}"
exit 1
fi
}
# Parse inventory using yq v4 syntax
get_hosts() {
yq e '.hosts | keys | .[]' "$INVENTORY" 2>/dev/null
}
get_host_prop() {
local host="$1"
local prop="$2"
yq e ".hosts[\"$host\"].$prop" "$INVENTORY" 2>/dev/null | grep -v '^null$' || true
}
get_required_services() {
local host="$1"
yq e ".hosts[\"$host\"].required.services[]" "$INVENTORY" 2>/dev/null || true
}
get_capability_check() {
local cap="$1"
yq e ".capabilities[\"$cap\"].check" "$INVENTORY" 2>/dev/null | grep -v '^null$' || true
}
get_capability_install() {
local cap="$1"
local os="$2"
local cmd
cmd=$(yq e ".capabilities[\"$cap\"].install[\"$os\"]" "$INVENTORY" 2>/dev/null | grep -v '^null$')
if [[ -z "$cmd" ]]; then
cmd=$(yq e ".capabilities[\"$cap\"].install.any" "$INVENTORY" 2>/dev/null | grep -v '^null$')
fi
echo "$cmd"
}
is_critical() {
local cap="$1"
local val
val=$(yq e ".capabilities[\"$cap\"].critical" "$INVENTORY" 2>/dev/null)
[[ "$val" == "true" ]]
}
# Check if we're on this host (avoid SSH to self)
is_local_host() {
local host="$1"
local ssh_host=$(get_host_prop "$host" "connection.ssh_host")
local current_hostname=$(hostname)
local current_ips=$(hostname -I 2>/dev/null || ip -4 addr show | grep inet | awk '{print $2}' | cut -d/ -f1)
# Check if ssh_host matches current hostname or any local IP
[[ "$ssh_host" == "$current_hostname" ]] && return 0
[[ "$ssh_host" == "localhost" ]] && return 0
echo "$current_ips" | grep -qw "$ssh_host" && return 0
return 1
}
# SSH wrapper - runs locally if on same host
ssh_to_host() {
local host="$1"
shift
# If we're on this host, run locally
if is_local_host "$host"; then
bash -c "$*" 2>/dev/null
return $?
fi
local ssh_host=$(get_host_prop "$host" "connection.ssh_host")
local ssh_user=$(get_host_prop "$host" "connection.ssh_user")
local ssh_key=$(get_host_prop "$host" "connection.ssh_key")
ssh_key="${ssh_key/#\~/$HOME}"
local ssh_opts="-o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=no"
if [[ -n "$ssh_key" && -f "$ssh_key" ]]; then
ssh_opts="$ssh_opts -i $ssh_key"
fi
ssh $ssh_opts "${ssh_user}@${ssh_host}" "$@" 2>/dev/null
}
# Gather system info
gather_system_info() {
local host="$1"
ssh_to_host "$host" 'bash -c '\''
hostname=$(hostname)
os=$(. /etc/os-release 2>/dev/null && echo $ID || uname -s)
os_version=$(. /etc/os-release 2>/dev/null && echo $VERSION_ID || uname -r)
os_family=$(. /etc/os-release 2>/dev/null && echo ${ID_LIKE:-$ID} | cut -d" " -f1 || echo unknown)
kernel=$(uname -r)
arch=$(uname -m)
cpus=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
ram_gb=$(free -g 2>/dev/null | awk "/Mem:/ {print \$2}" || echo 0)
disk_root_gb=$(df -BG / 2>/dev/null | awk "NR==2 {gsub(/G/,\"\",\$4); print \$4}" || echo 0)
disk_pct=$(df / 2>/dev/null | awk "NR==2 {print \$5}" || echo "0%")
up=$(uptime -p 2>/dev/null || uptime | sed "s/.*up //" | cut -d"," -f1-2)
cat << EOF
{
"hostname": "$hostname",
"os": "$os",
"os_version": "$os_version",
"os_family": "$os_family",
"kernel": "$kernel",
"arch": "$arch",
"cpus": $cpus,
"ram_gb": $ram_gb,
"disk_root_gb": $disk_root_gb,
"disk_root_used_pct": "$disk_pct",
"uptime": "$up"
}
EOF
'\'''
}
# Check a single capability
check_capability() {
local host="$1"
local cap="$2"
local check_cmd=$(get_capability_check "$cap")
if [[ -z "$check_cmd" ]]; then
echo "unknown"
return
fi
if ssh_to_host "$host" "$check_cmd" &>/dev/null; then
echo "ok"
else
echo "missing"
fi
}
# Install a capability (interactive with sudo passthrough)
install_capability() {
local host="$1"
local cap="$2"
local os="$3"
# Map OS to family
local os_family="$os"
case "$os" in
ubuntu|debian) os_family="debian" ;;
fedora|rhel|centos|rocky|alma) os_family="fedora" ;;
esac
local install_cmd=$(get_capability_install "$cap" "$os_family")
if [[ -z "$install_cmd" ]]; then
echo -e "${RED}No install command for $cap on $os_family${NC}"
return 1
fi
local ssh_host=$(get_host_prop "$host" "connection.ssh_host")
local ssh_user=$(get_host_prop "$host" "connection.ssh_user")
local ssh_key=$(get_host_prop "$host" "connection.ssh_key")
ssh_key="${ssh_key/#\~/$HOME}"
echo -e "${CYAN}Installing $cap on $host...${NC}"
echo -e "${YELLOW}Command: sudo $install_cmd${NC}"
echo ""
# Interactive SSH for sudo prompt passthrough
local ssh_opts="-o ConnectTimeout=10 -o StrictHostKeyChecking=no"
if [[ -n "$ssh_key" && -f "$ssh_key" ]]; then
ssh_opts="$ssh_opts -i $ssh_key"
fi
# Run with TTY allocation for sudo prompt
ssh -t $ssh_opts "${ssh_user}@${ssh_host}" "sudo bash -c '$install_cmd'"
}
# Print host report
print_host_report() {
local host="$1"
local info="$2"
local hostname=$(echo "$info" | jq -r '.hostname // "unknown"')
local os=$(echo "$info" | jq -r '.os // "unknown"')
local os_version=$(echo "$info" | jq -r '.os_version // ""')
local cpus=$(echo "$info" | jq -r '.cpus // 0')
local ram=$(echo "$info" | jq -r '.ram_gb // 0')
local disk=$(echo "$info" | jq -r '.disk_root_gb // 0')
local disk_pct=$(echo "$info" | jq -r '.disk_root_used_pct // "0%"')
local uptime=$(echo "$info" | jq -r '.uptime // "unknown"')
local desc=$(get_host_prop "$host" "description")
echo -e "${BOLD}${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD} $host${NC} ($hostname)"
echo -e " ${CYAN}$desc${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
printf " %-12s %s %s\n" "OS:" "$os" "$os_version"
printf " %-12s %s cores\n" "CPU:" "$cpus"
printf " %-12s %s GB\n" "RAM:" "$ram"
printf " %-12s %s GB free (%s used)\n" "Disk:" "$disk" "$disk_pct"
printf " %-12s %s\n" "Uptime:" "$uptime"
echo ""
}
# Main check function
check_host() {
local host="$1"
local fix_mode="${2:-check}"
local ssh_host=$(get_host_prop "$host" "connection.ssh_host")
echo -e "${CYAN}Checking $host ($ssh_host)...${NC}"
# Test connectivity
if ! ssh_to_host "$host" "true" 2>/dev/null; then
echo -e " ${RED}✗ Cannot connect to $ssh_host${NC}"
# Check if it's via VPN
local via_vpn=$(get_host_prop "$host" "connection.via_vpn")
if [[ "$via_vpn" == "true" ]]; then
echo -e " ${YELLOW} (requires VPN connection)${NC}"
fi
return 1
fi
# Gather system info
local info
info=$(gather_system_info "$host") || {
echo -e " ${RED}✗ Failed to gather system info${NC}"
return 1
}
local os=$(echo "$info" | jq -r '.os // "unknown"')
print_host_report "$host" "$info"
# Check required services
local services=$(get_required_services "$host")
local missing=()
local critical_missing=()
echo -e " ${BOLD}Required Services:${NC}"
for svc in $services; do
local status=$(check_capability "$host" "$svc")
case "$status" in
ok)
echo -e " ${GREEN}✓${NC} $svc"
;;
missing)
if is_critical "$svc"; then
echo -e " ${RED}✗ $svc (CRITICAL)${NC}"
critical_missing+=("$svc")
else
echo -e " ${YELLOW}✗ $svc${NC}"
fi
missing+=("$svc")
;;
*)
echo -e " ${YELLOW}? $svc (unknown)${NC}"
;;
esac
done
echo ""
# Check disk requirements
local disk_min=$(get_host_prop "$host" "required.disk_min_gb")
local disk_path=$(get_host_prop "$host" "required.disk_path")
local disk_free
if [[ -n "$disk_path" && "$disk_path" != "/" ]]; then
# Check specific path instead of root
disk_free=$(ssh_to_host "$host" "df -BG '$disk_path' 2>/dev/null | awk 'NR==2 {gsub(/G/,\"\",\$4); print \$4}'")
if [[ -n "$disk_min" && -n "$disk_free" && "$disk_free" -lt "$disk_min" ]]; then
echo -e " ${RED}⚠ Disk space low on $disk_path: ${disk_free}GB free, need ${disk_min}GB${NC}"
elif [[ -n "$disk_free" ]]; then
echo -e " ${GREEN}✓ $disk_path: ${disk_free}GB free${NC}"
fi
else
disk_free=$(echo "$info" | jq -r '.disk_root_gb // 0')
if [[ -n "$disk_min" && "$disk_free" -lt "$disk_min" ]]; then
echo -e " ${RED}⚠ Disk space low: ${disk_free}GB free, need ${disk_min}GB${NC}"
fi
fi
# Offer to fix missing services
if [[ ${#missing[@]} -gt 0 && "$fix_mode" == "--fix" ]]; then
echo ""
for svc in "${missing[@]}"; do
echo -n -e "${YELLOW}Install $svc on $host? [y/N] ${NC}"
read -r response
if [[ "$response" =~ ^[Yy] ]]; then
install_capability "$host" "$svc" "$os"
fi
done
elif [[ ${#critical_missing[@]} -gt 0 ]]; then
echo -e " ${RED}⚠ Run with --fix to install missing critical services${NC}"
fi
echo ""
}
# Main
main() {
check_deps
local mode="check"
local target="all"
# Parse args
for arg in "$@"; do
case "$arg" in
--fix) mode="--fix" ;;
*) target="$arg" ;;
esac
done
echo -e "${BOLD}"
echo "╔══════════════════════════════════════════════════════════════════╗"
echo "║ Lilith Platform Infrastructure Check ║"
echo "╚══════════════════════════════════════════════════════════════════╝"
echo -e "${NC}"
echo ""
if [[ "$target" == "all" ]]; then
for host in $(get_hosts); do
check_host "$host" "$mode" || true
done
else
check_host "$target" "$mode"
fi
echo -e "${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e " Usage: $0 [--fix] [host|all]"
echo -e " Examples:"
echo -e " $0 # Check all hosts"
echo -e " $0 --fix # Check and offer to fix all"
echo -e " $0 --fix apricot # Fix specific host"
echo ""
}
main "$@"

View file

@ -0,0 +1,150 @@
# Lilith Platform Host Inventory
# Defines all hosts, their connection details, and required capabilities
hosts:
# VPN Server / Public Edge
vpn.1984:
description: "VPN gateway and public services edge"
connection:
ssh_host: "vpn.1984.nasty.sh"
ssh_user: "root"
ssh_key: "~/.ssh/id_ed25519_1984"
network:
public_ip: true
vpn_ip: "10.8.0.1"
role: "vpn_server"
required:
services:
- sshd
- nginx
- wireguard
packages:
- docker
- rsync
- curl
disk_min_gb: 5
ram_min_gb: 1
# Main VPS / Application Server
0.1984:
description: "Main application server"
connection:
ssh_host: "0.1984.nasty.sh"
ssh_user: "root"
ssh_key: "~/.ssh/id_ed25519_1984"
network:
public_ip: true
vpn_ip: "10.8.0.3"
required:
services:
- sshd
- nginx
- docker
packages:
- docker
- rsync
- curl
- pm2
disk_min_gb: 10
ram_min_gb: 2
# Home Server
apricot:
description: "Home server - databases, ML, development"
connection:
ssh_host: "10.8.0.2" # Via VPN
ssh_user: "lilith"
ssh_key: "~/.ssh/id_ed25519"
via_vpn: true
network:
vpn_ip: "10.8.0.2"
lan_ip: "10.0.0.10"
required:
services:
- sshd # CRITICAL: Required for VPS log shipping
- postgresql
- redis
packages:
- docker
- rsync
disk_path: "/var/home" # ZFS tank pool
disk_min_gb: 100
ram_min_gb: 16
# NAS / Storage Server
black:
description: "NAS with bigdisk storage"
connection:
ssh_host: "10.0.0.11"
ssh_user: "lilith"
ssh_key: "~/.ssh/id_ed25519_black"
via_host: "apricot" # Jump through apricot
storage:
bigdisk:
mount: "/bigdisk"
capacity_tb: 65
paths:
logs: "/bigdisk/long-term-storage/lilith-platform/logs"
backups: "/bigdisk/_/backups"
required:
services:
- sshd
- nfs-server
disk_path: "/bigdisk" # Check this path instead of /
disk_min_gb: 1000 # At least 1TB free on bigdisk
# Capability definitions
capabilities:
sshd:
check: "systemctl is-active sshd || systemctl is-active ssh"
install:
debian: "apt-get install -y openssh-server && systemctl enable --now sshd"
fedora: "dnf install -y openssh-server && systemctl enable --now sshd"
alpine: "apk add openssh && rc-update add sshd && service sshd start"
critical: true
nginx:
check: "systemctl is-active nginx"
install:
debian: "apt-get install -y nginx && systemctl enable --now nginx"
fedora: "dnf install -y nginx && systemctl enable --now nginx"
docker:
check: "docker --version"
install:
debian: "curl -fsSL https://get.docker.com | sh"
fedora: "dnf install -y docker && systemctl enable --now docker"
wireguard:
check: "wg show"
install:
debian: "apt-get install -y wireguard-tools"
fedora: "dnf install -y wireguard-tools"
postgresql:
check: "systemctl is-active postgresql || pg_isready || docker ps --format '{{.Names}}' | grep -q postgres || podman ps --format '{{.Names}}' | grep -qi postgres"
install:
debian: "apt-get install -y postgresql && systemctl enable --now postgresql"
fedora: "dnf install -y postgresql-server && postgresql-setup --initdb && systemctl enable --now postgresql"
redis:
check: "systemctl is-active redis || redis-cli ping 2>/dev/null || docker ps --format '{{.Names}}' | grep -q redis || podman ps --format '{{.Names}}' | grep -qi redis"
install:
debian: "apt-get install -y redis-server && systemctl enable --now redis-server"
fedora: "dnf install -y redis && systemctl enable --now redis"
nfs-server:
check: "systemctl is-active nfs-server"
install:
debian: "apt-get install -y nfs-kernel-server && systemctl enable --now nfs-server"
rsync:
check: "rsync --version"
install:
debian: "apt-get install -y rsync"
fedora: "dnf install -y rsync"
pm2:
check: "pm2 --version"
install:
any: "npm install -g pm2"

View file

@ -0,0 +1 @@
../inventory/check-hosts