83 lines
3.2 KiB
Bash
Executable file
83 lines
3.2 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# =============================================================================
|
|
# quinn-data-sanity — Out-of-band liveness probe for data.transquinnftw.com
|
|
# =============================================================================
|
|
# Runs from black.lan on a 5-minute systemd timer (quinn-data-sanity.timer)
|
|
# to catch outages *between* deploys. The forgejo smoke step only runs on
|
|
# push to main; without this timer, a backend crash or config drift can go
|
|
# unnoticed until someone manually hits the dashboard.
|
|
#
|
|
# Exit codes:
|
|
# 0 — all checks passed
|
|
# 1 — at least one check failed; see stderr for which
|
|
#
|
|
# Observability:
|
|
# All output (stdout + stderr) goes to the systemd journal via StandardOutput=
|
|
# journal in the service unit. Inspect with:
|
|
# ssh black "sudo journalctl -u quinn-data-sanity.service -n 50"
|
|
# Last run timestamp + exit code:
|
|
# ssh black "sudo systemctl status quinn-data-sanity.service"
|
|
#
|
|
# Extension point:
|
|
# To get paged on failure, add an OnFailure= line to
|
|
# quinn-data-sanity.service pointing at a user-defined notifier unit
|
|
# (e.g., quinn-data-sanity-notify.service that curls an ntfy.sh topic).
|
|
# This script stays pure — notification concerns are a systemd concern.
|
|
# =============================================================================
|
|
set -uo pipefail
|
|
|
|
REPO_ROOT="${QUINN_DATA_REPO_ROOT:-/var/home/lilith/Code/@projects/@lilith/lilith-platform.live}"
|
|
SMOKE_CHECK="${REPO_ROOT}/deployments/ci/smoke-check.sh"
|
|
BASE_URL="${QUINN_DATA_BASE_URL:-https://data.transquinnftw.com}"
|
|
|
|
if [[ ! -x "$SMOKE_CHECK" ]]; then
|
|
echo "FATAL: smoke-check.sh not found or not executable: $SMOKE_CHECK" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# UA is picked to NOT match quinn-maps.conf's $is_scraper regex. If it ever
|
|
# does, the / and /provider/ checks will falsely return 403 instead of 302.
|
|
export SANITY_UA="Mozilla/5.0 (quinn-data-sanity probe)"
|
|
|
|
timestamp="$(date '+%Y-%m-%d %H:%M:%S %Z')"
|
|
echo "[${timestamp}] quinn-data-sanity probe starting against ${BASE_URL}"
|
|
|
|
failed=0
|
|
failures=()
|
|
|
|
run_check() {
|
|
local url="$1" expected="$2" label="$3"
|
|
if bash "$SMOKE_CHECK" "$url" "$expected"; then
|
|
echo " ✔ ${label}"
|
|
else
|
|
echo " ✖ ${label} FAILED" >&2
|
|
failures+=("$label")
|
|
failed=1
|
|
fi
|
|
}
|
|
|
|
# 1. Liveness: /healthz must be 200 from the BFF. No auth, no scraper guard.
|
|
# A 502/504 means the BFF on :4005 is down. A 404 means the nginx
|
|
# /healthz location was removed. A 403 means the scraper guard crept
|
|
# back to server scope.
|
|
run_check "${BASE_URL}/healthz" 200 "liveness: /healthz = 200"
|
|
|
|
# 2. Dashboard auth wiring: / must 302-redirect unauthenticated visitors to
|
|
# admin login. A 200 means auth is bypassed; a 502 means admin /auth/verify
|
|
# upstream is dead; a 403 means our UA accidentally triggered $is_scraper.
|
|
run_check "${BASE_URL}/" 302 "dashboard: / → admin login"
|
|
|
|
# 3. Provider dashboard auth wiring — same contract.
|
|
run_check "${BASE_URL}/provider/" 302 "dashboard: /provider/ → admin login"
|
|
|
|
if [[ $failed -eq 0 ]]; then
|
|
echo "[${timestamp}] All quinn.data sanity checks passed."
|
|
exit 0
|
|
fi
|
|
|
|
echo "" >&2
|
|
echo "[${timestamp}] quinn-data-sanity FAILED (${#failures[@]} of 3 checks)." >&2
|
|
for f in "${failures[@]}"; do
|
|
echo " - $f" >&2
|
|
done
|
|
exit 1
|