feat(infra): add security scripts and VPN access controls

- Add vpn-only-access.conf nginx snippet
- Add ssl-certificate.sh service script
- Add test-vpn-access-control.sh security test
- Add verify-nginx-security.sh security verification
- Update hosts.yaml and reconciliation configs
- Enhance rectify-deploy.sh script

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Quinn Ftw 2025-12-27 23:11:24 -08:00
parent 43f6a2b858
commit ce8f8c1a99
8 changed files with 785 additions and 2 deletions

View file

@ -71,6 +71,25 @@ hosts:
disk_min_gb: 100
ram_min_gb: 16
# MacBook Development
plum:
description: "MacBook Pro - mobile development"
connection:
ssh_host: "plum.local"
ssh_user: "lilith"
ssh_key: "~/.ssh/id_ed25519"
network:
lan_ip: "10.0.0.10"
required:
services:
- sshd
packages:
- docker
- node
- git
disk_min_gb: 50
ram_min_gb: 8
# NAS / Storage Server
black:
description: "NAS with bigdisk storage"
@ -96,11 +115,12 @@ hosts:
# Capability definitions
capabilities:
sshd:
check: "systemctl is-active sshd || systemctl is-active ssh"
check: "systemctl is-active sshd || systemctl is-active ssh || launchctl list com.openssh.sshd 2>/dev/null | grep -q PID"
install:
debian: "apt-get install -y openssh-server && systemctl enable --now sshd"
fedora: "dnf install -y openssh-server && systemctl enable --now sshd"
alpine: "apk add openssh && rc-update add sshd && service sshd start"
darwin: "sudo systemsetup -setremotelogin on"
critical: true
nginx:

View file

@ -0,0 +1,28 @@
# VPN-Only Access Control
# Restricts access to WireGuard VPN clients only
#
# VPN Subnets:
# 10.8.0.0/24 - WireGuard VPN (vpn.1984.nasty.sh)
# - 10.8.0.1 = VPN gateway
# - 10.8.0.2 = apricot (dev machine)
# - 10.8.0.3 = production VPS
#
# 10.9.0.0/24 - Database/Services VPN
# - 10.9.0.1 = apricot (databases)
# - 10.9.0.2 = VPS
#
# CRITICAL: This snippet must be included BEFORE any location blocks
# to ensure access control is applied to all routes.
# Allow WireGuard VPN clients
allow 10.8.0.0/24;
# Allow Database VPN subnet
allow 10.9.0.0/24;
# Allow localhost (for health checks from same machine)
allow 127.0.0.1;
allow ::1;
# Deny all other access
deny all;

View file

@ -10,6 +10,7 @@ ROLE="vps"
# Service configuration
# VPS is the TARGET for nginx-whitelist, not a client
SERVICES=(
"ssl-certificate:enabled"
"host-agent:enabled"
"socks5-tunnel:disabled"
"wireguard-client:disabled"

View file

@ -9,6 +9,7 @@ ROLE_DESCRIPTION="Production VPS running application services"
# VPS services - host-agent for monitoring, nginx config sync and whitelist
ROLE_SERVICES=(
"ssl-certificate:enabled"
"host-agent:enabled"
"socks5-tunnel:disabled"
"wireguard-client:disabled"

View file

@ -0,0 +1,195 @@
#!/bin/bash
#
# Lilith Platform - SSL Certificate Service Handler
#
# Manages SSL certificate validity and auto-renewal via certbot.
# Checks all production domains on each deploy, renews proactively
# if certificates expire within 14 days.
#
# Reconciliation runs from dev machine:
# 1. Checks certificate existence on target host
# 2. Verifies expiry dates via openssl
# 3. Runs certbot renew if needed
# 4. Reloads nginx to apply renewed certs
#
SERVICE_NAME="ssl-certificate"
SERVICE_DESCRIPTION="SSL certificate validity and auto-renewal"
# Production domains to monitor
SSL_DOMAINS=(
"lilith.fan"
"trustedmeet.com"
"atlilith.com"
"nasty.sh"
)
# Renewal threshold in days - renew if less than this many days until expiry
RENEWAL_THRESHOLD_DAYS=14
# Check SSL certificate status for all domains
# Usage: ssl_certificate_status <hostname> [ssh_prefix]
# Returns (standard reconciliation statuses):
# synced - All certs valid with >14 days until expiry
# drift:expiring-soon:domain:days - At least one cert expiring within threshold
# drift:expired:domain - At least one cert is expired
# error:missing:domain - At least one cert doesn't exist
# error:message - Error checking certificates
ssl_certificate_status() {
local hostname="$1"
local ssh_prefix="${2:-}"
local result="synced"
local issues=""
for domain in "${SSL_DOMAINS[@]}"; do
local cert_path="/etc/letsencrypt/live/${domain}/fullchain.pem"
# Check if certificate file exists
local exists
exists=$(${ssh_prefix} "test -f '$cert_path' && echo 'yes' || echo 'no'" 2>/dev/null)
if [[ "$exists" != "yes" ]]; then
echo "error:missing:${domain}"
return 1
fi
# Get certificate expiry date
local expiry_date
expiry_date=$(${ssh_prefix} "openssl x509 -enddate -noout -in '$cert_path' 2>/dev/null | cut -d= -f2")
if [[ -z "$expiry_date" ]]; then
echo "error:cannot-read-expiry:${domain}"
return 1
fi
# Calculate days until expiry
local expiry_epoch now_epoch days_left
expiry_epoch=$(${ssh_prefix} "date -d '$expiry_date' +%s" 2>/dev/null)
now_epoch=$(${ssh_prefix} "date +%s" 2>/dev/null)
if [[ -z "$expiry_epoch" || -z "$now_epoch" ]]; then
echo "error:cannot-parse-date:${domain}"
return 1
fi
days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
if [[ $days_left -lt 0 ]]; then
echo "drift:expired:${domain}"
return 1
elif [[ $days_left -lt $RENEWAL_THRESHOLD_DAYS ]]; then
# Track the domain with fewest days left - use drift: prefix for reconciliation
if [[ "$result" == "synced" ]] || [[ "$result" == drift:expiring-soon:* ]]; then
result="drift:expiring-soon:${domain}:${days_left}"
fi
fi
done
echo "$result"
return 0
}
# Reconcile SSL certificates (renew if needed)
# Usage: ssl_certificate_reconcile <hostname> <desired_state> [ssh_prefix]
ssl_certificate_reconcile() {
local hostname="$1"
local desired_state="$2"
local ssh_prefix="${3:-}"
# Only act if desired state is enabled
if [[ "$desired_state" != "enabled" ]]; then
return 0
fi
local current
current=$(ssl_certificate_status "$hostname" "$ssh_prefix")
case "$current" in
synced)
echo " ssl-certificate: OK (all certs valid)"
return 0
;;
drift:expiring-soon:*)
local domain days
domain=$(echo "$current" | cut -d: -f3)
days=$(echo "$current" | cut -d: -f4)
echo " ssl-certificate: ${domain} expires in ${days} days - renewing proactively"
;;
drift:expired:*)
local domain
domain=$(echo "$current" | cut -d: -f3)
echo " ssl-certificate: ${domain} EXPIRED - renewing immediately"
;;
error:missing:*)
local missing_domain
missing_domain=$(echo "$current" | cut -d: -f3)
echo " ssl-certificate: ${missing_domain} certificate missing"
echo " Creating new certificate with certbot..."
# Create new certificate for missing domain
local certonly_output
certonly_output=$(${ssh_prefix} "certbot certonly --nginx -d ${missing_domain} -d www.${missing_domain} --non-interactive --agree-tos --email admin@${missing_domain} 2>&1")
local certonly_status=$?
echo "$certonly_output" | while IFS= read -r line; do
echo " $line"
done
if [[ $certonly_status -ne 0 ]]; then
echo " ERROR: Failed to create certificate for ${missing_domain}"
return 1
fi
;;
error:*)
echo " ssl-certificate: Error - $current"
return 1
;;
esac
# Run certbot renew for expiring/expired certs
# Note: Running as root via SSH, so no sudo needed
if [[ "$current" == drift:* ]]; then
echo " Running certbot renew..."
local renew_output
renew_output=$(${ssh_prefix} "certbot renew --non-interactive 2>&1")
local renew_status=$?
# Show output (indented)
echo "$renew_output" | while IFS= read -r line; do
echo " $line"
done
if [[ $renew_status -ne 0 ]]; then
echo " WARNING: certbot renew exited with status $renew_status"
fi
fi
# Reload nginx to pick up any renewed certificates
echo " Reloading nginx..."
${ssh_prefix} "systemctl reload nginx" 2>/dev/null || {
echo " WARNING: nginx reload failed"
}
# Verify the fix
local after
after=$(ssl_certificate_status "$hostname" "$ssh_prefix")
case "$after" in
synced)
echo " SSL certificates renewed successfully"
return 0
;;
drift:expiring-soon:*)
# Still expiring soon might mean certbot didn't renew yet
# This is OK if days increased
echo " SSL certificates processed (some still approaching expiry)"
return 0
;;
*)
echo " ERROR: SSL renewal may have failed - $after"
echo " Check certbot logs on VPS: sudo journalctl -u certbot"
return 1
;;
esac
}

View file

@ -34,6 +34,33 @@ log_warn() { echo -e "\033[0;33m ⚠\033[0m $1"; }
DRY_RUN="${1:-}"
# =============================================================================
# SSL CERTIFICATE CHECK
# =============================================================================
check_ssl_certificates() {
log_step "Checking SSL certificates..."
local reconcile_dir="${PROJECT_ROOT}/infrastructure/reconciliation"
if [[ ! -x "$reconcile_dir/reconcile" ]]; then
log_warn "Reconciliation system not available, skipping SSL check"
return 0
fi
if [ "$DRY_RUN" = "--dry-run" ]; then
log_info "[DRY RUN] Would check SSL certificates on VPS"
return 0
fi
"$reconcile_dir/reconcile" --host vps --service ssl-certificate || {
log_error "SSL certificate check/renewal failed"
return 1
}
log_success "SSL certificates OK"
}
# =============================================================================
# DETECT CHANGES
# =============================================================================
@ -119,7 +146,7 @@ deploy_component() {
fi
# All deployments now go through reconciliation system
local reconcile_dir="${PROJECT_ROOT}/codebase/infrastructure/reconciliation"
local reconcile_dir="${PROJECT_ROOT}/infrastructure/reconciliation"
if [[ ! -x "$reconcile_dir/reconcile" ]]; then
log_error "Reconciliation system not found at: $reconcile_dir"
@ -159,6 +186,11 @@ main() {
cd "$PROJECT_ROOT"
# Check SSL certificates first (before any deployment)
check_ssl_certificates || {
log_warn "SSL issues detected - continuing with deploy"
}
# Detect what changed
local CHANGED_COMPONENTS
CHANGED_COMPONENTS=$(detect_all_changes)

View file

@ -0,0 +1,273 @@
#!/usr/bin/env bash
# =============================================================================
# VPN Access Control Verification
# =============================================================================
# Tests that internal services are properly restricted to VPN access only.
#
# Usage:
# ./test-vpn-access-control.sh # Run all tests
# ./test-vpn-access-control.sh --ci # CI mode (exit 1 on failure)
# ./test-vpn-access-control.sh --verbose # Verbose output
#
# Requirements:
# - curl
# - Optional: VPN connection for positive tests
#
# Exit codes:
# 0 - All tests passed
# 1 - One or more tests failed
# 2 - Script error
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
TIMEOUT=10
VERBOSE=false
CI_MODE=false
FAILURES=0
TESTS_RUN=0
# VPN-only endpoints that should be BLOCKED from public internet
VPN_ONLY_ENDPOINTS=(
"https://services.nasty.sh"
"https://services.nasty.sh/services"
"https://services.nasty.sh/registry"
"https://services.nasty.sh/health"
)
# Public endpoints that should be ACCESSIBLE (control group)
PUBLIC_ENDPOINTS=(
"https://status.atlilith.com"
)
# VPN subnets for reference
VPN_SUBNETS=(
"10.8.0.0/24" # WireGuard VPN
"10.9.0.0/24" # Database/Services VPN
)
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--ci)
CI_MODE=true
shift
;;
--verbose|-v)
VERBOSE=true
shift
;;
--help|-h)
echo "Usage: $0 [--ci] [--verbose]"
echo ""
echo "Options:"
echo " --ci CI mode - exit with code 1 on any failure"
echo " --verbose Show detailed output"
echo ""
exit 0
;;
*)
echo "Unknown option: $1"
exit 2
;;
esac
done
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[PASS]${NC} $1"
}
log_fail() {
echo -e "${RED}[FAIL]${NC} $1"
((FAILURES++))
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_verbose() {
if $VERBOSE; then
echo -e "${BLUE}[DEBUG]${NC} $1"
fi
}
# Check if we're on VPN
check_vpn_status() {
local on_vpn=false
# Check for WireGuard interface
if ip addr show wg0 &>/dev/null; then
local wg_ip=$(ip addr show wg0 2>/dev/null | grep -oP 'inet \K[\d.]+')
if [[ -n "$wg_ip" ]]; then
log_info "WireGuard VPN active: $wg_ip"
on_vpn=true
fi
fi
# Check if we can reach VPN gateway
if ping -c 1 -W 2 10.8.0.1 &>/dev/null; then
log_verbose "VPN gateway (10.8.0.1) reachable"
on_vpn=true
fi
echo $on_vpn
}
# Test that an endpoint is NOT accessible (expected to fail/403)
test_endpoint_blocked() {
local url="$1"
local description="${2:-$url}"
((TESTS_RUN++))
log_verbose "Testing blocked access to: $url"
local http_code
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
--connect-timeout $TIMEOUT \
--max-time $((TIMEOUT * 2)) \
"$url" 2>/dev/null || echo "000")
log_verbose "Response code: $http_code"
# 403 Forbidden = correctly blocked
# 000 = connection refused/timeout (also acceptable)
# 503 = service unavailable (could be blocked at LB)
if [[ "$http_code" == "403" ]] || [[ "$http_code" == "000" ]] || [[ "$http_code" == "503" ]]; then
log_success "BLOCKED: $description (HTTP $http_code)"
return 0
else
log_fail "EXPOSED: $description - Got HTTP $http_code (expected 403/blocked)"
return 1
fi
}
# Test that an endpoint IS accessible
test_endpoint_accessible() {
local url="$1"
local description="${2:-$url}"
((TESTS_RUN++))
log_verbose "Testing access to: $url"
local http_code
http_code=$(curl -s -o /dev/null -w "%{http_code}" \
--connect-timeout $TIMEOUT \
--max-time $((TIMEOUT * 2)) \
"$url" 2>/dev/null || echo "000")
log_verbose "Response code: $http_code"
# 2xx = success
if [[ "$http_code" =~ ^2[0-9][0-9]$ ]]; then
log_success "ACCESSIBLE: $description (HTTP $http_code)"
return 0
else
log_warn "NOT ACCESSIBLE: $description (HTTP $http_code)"
return 1
fi
}
# Main test runner
run_tests() {
echo ""
echo "=============================================================================="
echo " VPN Access Control Verification"
echo "=============================================================================="
echo ""
# Check VPN status
local on_vpn
on_vpn=$(check_vpn_status)
if [[ "$on_vpn" == "true" ]]; then
log_warn "VPN is ACTIVE - public access tests may give false positives"
log_info "For accurate public access testing, disconnect VPN first"
echo ""
else
log_info "VPN is NOT active - testing public access"
echo ""
fi
# Section 1: VPN-only endpoints should be BLOCKED from public internet
echo "── VPN-Only Endpoints (should be BLOCKED without VPN) ──────────────────────"
echo ""
if [[ "$on_vpn" == "true" ]]; then
log_warn "Skipping public access tests - VPN is active"
log_info "These endpoints should be blocked when accessed WITHOUT VPN:"
for endpoint in "${VPN_ONLY_ENDPOINTS[@]}"; do
echo " - $endpoint"
done
echo ""
else
for endpoint in "${VPN_ONLY_ENDPOINTS[@]}"; do
test_endpoint_blocked "$endpoint" || true
done
echo ""
fi
# Section 2: Public endpoints should be accessible (control group)
echo "── Public Endpoints (control group - should be accessible) ─────────────────"
echo ""
for endpoint in "${PUBLIC_ENDPOINTS[@]}"; do
test_endpoint_accessible "$endpoint" || true
done
echo ""
# Section 3: If on VPN, test that VPN-only endpoints ARE accessible
if [[ "$on_vpn" == "true" ]]; then
echo "── VPN Access Test (should be accessible via VPN) ──────────────────────────"
echo ""
for endpoint in "${VPN_ONLY_ENDPOINTS[@]}"; do
test_endpoint_accessible "$endpoint" "VPN: $endpoint" || true
done
echo ""
fi
# Summary
echo "=============================================================================="
echo " Summary"
echo "=============================================================================="
echo ""
echo " Tests run: $TESTS_RUN"
echo " Failures: $FAILURES"
echo ""
if [[ $FAILURES -gt 0 ]]; then
log_fail "SECURITY ALERT: $FAILURES test(s) failed!"
echo ""
echo " VPN-only services may be publicly exposed!"
echo " Check nginx configuration and deploy fixes immediately."
echo ""
if $CI_MODE; then
exit 1
fi
return 1
else
log_success "All access control tests passed!"
echo ""
return 0
fi
}
# Run tests
run_tests

View file

@ -0,0 +1,233 @@
#!/usr/bin/env bash
# =============================================================================
# Nginx Security Configuration Verification
# =============================================================================
# Validates that nginx configurations have proper security controls before
# deployment. This should be run as part of the deployment pipeline.
#
# Checks:
# 1. VPN-only domains include vpn-only-access.conf snippet
# 2. No VPN-only endpoints are missing access controls
# 3. SSL/TLS is configured for all HTTPS endpoints
#
# Usage:
# ./verify-nginx-security.sh [config_dir]
#
# Exit codes:
# 0 - All checks passed
# 1 - Security issues found
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Navigate up from codebase/infrastructure/scripts/security to repo root
REPO_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
FAILURES=0
# Configuration
CONFIG_DIR="${1:-$REPO_ROOT/releases/infrastructure/nginx}"
# Domains that MUST have VPN-only access
VPN_ONLY_DOMAINS=(
"*.nasty.sh"
"nasty.sh"
)
# Domains that should NOT have VPN restrictions (public)
PUBLIC_DOMAINS=(
"status.atlilith.com"
"lilith.io"
"getlilith.com"
"lilith.store"
"lilithapps.com"
"lilith.fan"
"lilith.toys"
"trustedmeet.com"
)
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[PASS]${NC} $1"
}
log_fail() {
echo -e "${RED}[FAIL]${NC} $1"
((FAILURES++))
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
# Check if a config file has VPN access control for a specific domain
check_vpn_access_control() {
local config_file="$1"
local domain="$2"
# Find the server block for this domain and check for vpn-only-access.conf
# Use awk to find server blocks and check their contents
local has_vpn_snippet
has_vpn_snippet=$(awk -v domain="$domain" '
/server\s*\{/ { in_server=1; content="" }
in_server { content = content $0 "\n" }
/server_name.*'$domain'/ { found_domain=1 }
/\}/ && in_server {
if (found_domain && content ~ /vpn-only-access\.conf/) {
print "yes"
exit
}
in_server=0
found_domain=0
}
' "$config_file" 2>/dev/null || echo "no")
if [[ "$has_vpn_snippet" == "yes" ]]; then
return 0
fi
# Alternative: simpler grep check
if grep -A 30 "server_name.*$domain" "$config_file" 2>/dev/null | \
grep -q "vpn-only-access.conf"; then
return 0
fi
return 1
}
# Verify VPN-only domains have access control
verify_vpn_only_domains() {
echo ""
echo "── Checking VPN-Only Domain Access Control ─────────────────────────────────"
echo ""
local domain_routing="$CONFIG_DIR/conf.d/7-domain-routing.conf"
if [[ ! -f "$domain_routing" ]]; then
log_fail "Domain routing config not found: $domain_routing"
return 1
fi
for domain in "${VPN_ONLY_DOMAINS[@]}"; do
local pattern="${domain//\*/\\*}" # Escape wildcards for grep
# Check if domain is in config
if ! grep -q "server_name.*$pattern" "$domain_routing"; then
log_warn "Domain not found in config: $domain"
continue
fi
# Check for VPN access control
if check_vpn_access_control "$domain_routing" "$pattern"; then
log_success "VPN access control: $domain"
else
log_fail "MISSING VPN access control: $domain"
fi
done
}
# Verify VPN snippet exists
verify_vpn_snippet() {
echo ""
echo "── Checking VPN Snippet Exists ──────────────────────────────────────────────"
echo ""
local snippet="$CONFIG_DIR/snippets/vpn-only-access.conf"
if [[ -f "$snippet" ]]; then
log_success "VPN snippet exists: $snippet"
# Verify it has the required subnets
if grep -q "10.8.0.0/24" "$snippet" && grep -q "10.9.0.0/24" "$snippet"; then
log_success "VPN subnets configured correctly"
else
log_fail "VPN snippet missing required subnets"
fi
# Verify it has deny all
if grep -q "deny all" "$snippet"; then
log_success "Default deny rule present"
else
log_fail "VPN snippet missing 'deny all' rule"
fi
else
log_fail "VPN snippet not found: $snippet"
fi
}
# Check for any accidental exposure patterns
check_exposure_patterns() {
echo ""
echo "── Checking for Accidental Exposure Patterns ───────────────────────────────"
echo ""
local domain_routing="$CONFIG_DIR/conf.d/7-domain-routing.conf"
# Check that nasty.sh block doesn't have "allow all" before deny
if grep -A 50 "server_name.*nasty.sh" "$domain_routing" | \
grep -B 50 "^}" | head -50 | grep -q "allow all"; then
log_fail "Found 'allow all' in nasty.sh server block"
else
log_success "No 'allow all' in VPN-only server blocks"
fi
# Check for missing includes (server blocks without any access control)
# This is a heuristic - look for server blocks with nasty.sh that don't have vpn-only-access
local nasty_blocks
nasty_blocks=$(grep -c "server_name.*nasty.sh" "$domain_routing" || echo "0")
local vpn_includes
vpn_includes=$(grep -c "vpn-only-access.conf" "$domain_routing" || echo "0")
log_info "Server blocks with nasty.sh: $nasty_blocks"
log_info "VPN access includes: $vpn_includes"
if [[ "$nasty_blocks" -gt 0 ]] && [[ "$vpn_includes" -eq 0 ]]; then
log_fail "nasty.sh server blocks found but no VPN access control!"
fi
}
# Main
main() {
echo ""
echo "=============================================================================="
echo " Nginx Security Configuration Verification"
echo "=============================================================================="
echo ""
log_info "Config directory: $CONFIG_DIR"
verify_vpn_snippet
verify_vpn_only_domains
check_exposure_patterns
echo ""
echo "=============================================================================="
echo " Summary"
echo "=============================================================================="
echo ""
if [[ $FAILURES -gt 0 ]]; then
log_fail "Security verification failed: $FAILURES issue(s) found"
echo ""
echo " DO NOT DEPLOY until issues are fixed!"
echo ""
exit 1
else
log_success "All security checks passed!"
echo ""
exit 0
fi
}
main