lilith-platform.live/deployments/@domains/quinn.api/deploy.sh
Natalie 7ab9c1644d infra(migration): ct-forge (cocotte DO) now canonical for lilith-platform.live git forge + Verdaccio registry
- phase-b: mesh-join, pgbouncer (diag, fw, scram-sync, userlist-fix, base)
- phase-c: repoint-edge (clear 504s by switching upstreams off dead black to vps-0 local), seed-do-pg
- grant-migration-ssh-perms, recover-from-vps0, forge-verdaccio (diag + fix-perms)
- push-lilith-packages-to-cocotte-forge.sh (republish surviving @lilith/* tarballs from local plum verdaccio storage to ct-forge registry 134.199.243.61:4873; strips stale publishConfig pointing at dead black)
- updates to setup-forgejo-host.sh (ct Forgejo URL/comments), terraform/README.md (IaC note moved to uvlava on ct), quinn.api/deploy.sh (SMTP_HOST default for mail migration)

forge.black.lan + npm.black.lan + apricot decommissioned for git, registry, and edge. 'origin' remote (ssh to 134.199.243.61:2222/platform/lilith-platform.live.git) + 'http://134.199.243.61:4873/' are canonical. Black remote kept as legacy mirror. See project-stack.md, push script, and uvlava/terraform/do for DNS/Caddy transition to npm.ct.uvlava.com + forge.ct.uvlava.com.
2026-06-28 13:39:01 -04:00

306 lines
13 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../../" && pwd)"
REMOTE="${QUINN_API_REMOTE:-black}"
REMOTE_API="/opt/quinn-api"
REMOTE_BACKUPS="/opt/quinn-api.deploy-backups"
TIMESTAMP="$(date '+%Y%m%d_%H%M%S')"
BACKUP="${REMOTE_BACKUPS}/${TIMESTAMP}"
# Forgejo runs this workflow ON black — ssh black loops back and hangs on host-key
# verification in the act runner's clean ~/.ssh. localhost skips ssh/scp entirely.
DEPLOY_LOCAL=false
if [[ "$REMOTE" == "localhost" || "$REMOTE" == "127.0.0.1" || "$REMOTE" == "." ]]; then
DEPLOY_LOCAL=true
fi
run_remote() {
if [[ "$DEPLOY_LOCAL" == true ]]; then
bash -euo pipefail
else
ssh "$REMOTE" bash -euo pipefail
fi
}
run_remote_cmd() {
if [[ "$DEPLOY_LOCAL" == true ]]; then
bash -euo pipefail -c "$1"
else
# ssh joins its argv with spaces and the remote shell re-splits, so passing
# the command unquoted makes `bash -c "mkdir -p X"` arrive as `bash -c mkdir`
# with -p/X as positional args (mkdir then errors "missing operand"). %q-quote
# the whole command so it survives the remote re-parse as one -c argument.
ssh "$REMOTE" "bash -euo pipefail -c $(printf '%q' "$1")"
fi
}
copy_to_remote() {
local src="$1" dest="$2"
if [[ "$DEPLOY_LOCAL" == true ]]; then
cp "$src" "$dest"
else
scp "$src" "${REMOTE}:${dest}"
fi
}
# quinn-vps runs the deploy as root without sudo; black is an unprivileged user
# with passwordless sudo. Resolve once so privileged steps work on both.
if [[ "$DEPLOY_LOCAL" == true ]]; then
REMOTE_SUDO="$(command -v sudo >/dev/null 2>&1 && echo sudo || true)"
else
REMOTE_SUDO="$(ssh "$REMOTE" 'command -v sudo >/dev/null 2>&1 && echo sudo || true')"
fi
# ---------------------------------------------------------------------------
# --rollback flag: restore the most recent backup
# ---------------------------------------------------------------------------
if [[ "${1:-}" == "--rollback" ]]; then
echo "==> [ROLLBACK] Restoring previous quinn-api on ${REMOTE}..."
run_remote <<'ENDSSH'
REMOTE_BACKUPS="/opt/quinn-api.deploy-backups"
REMOTE_API="/opt/quinn-api"
latest="$(ls -1t "$REMOTE_BACKUPS" 2>/dev/null | head -1)"
if [[ -z "$latest" ]]; then
echo "No backups found." >&2
exit 1
fi
echo " Restoring from $REMOTE_BACKUPS/$latest ..."
rsync -a --delete "$REMOTE_BACKUPS/$latest/" "$REMOTE_API/"
ENDSSH
echo "==> Restarting quinn-api service..."
run_remote_cmd "${REMOTE_SUDO} systemctl restart quinn-api"
echo ""
echo "Rollback completed at $(date '+%Y-%m-%d %H:%M:%S %Z')"
exit 0
fi
# ---------------------------------------------------------------------------
# --skip-build flag: skip local typecheck + build
# ---------------------------------------------------------------------------
SKIP_BUILD=false
for arg in "$@"; do [[ "$arg" == "--skip-build" ]] && SKIP_BUILD=true; done
# ---------------------------------------------------------------------------
# Rollback trap
# ---------------------------------------------------------------------------
BACKUPS_CREATED=false
rollback_on_failure() {
local exit_code=$?
echo ""
echo "Deploy step failed (exit ${exit_code})."
if [[ "$BACKUPS_CREATED" == "true" ]]; then
echo "==> [AUTO-ROLLBACK] Restoring previous quinn-api on ${REMOTE}..."
run_remote <<ENDSSH || echo " WARNING: rollback failed. Manual intervention required." >&2
set -euo pipefail
if [[ -d "${BACKUP}" ]]; then
rsync -a --delete "${BACKUP}/" "${REMOTE_API}/"
echo " API restored from ${BACKUP}"
${REMOTE_SUDO} systemctl restart quinn-api
echo " Service restarted."
fi
ENDSSH
echo " Rollback complete."
else
echo " No backups were created."
fi
exit "$exit_code"
}
trap rollback_on_failure ERR
# ---------------------------------------------------------------------------
# [1/5] Typecheck + build Node.js bundle
# ---------------------------------------------------------------------------
if [[ "$SKIP_BUILD" == false ]]; then
echo "==> [1/5] Type-checking and building quinn.api..."
cd "$REPO_ROOT/codebase/@features/api"
bun run typecheck
bun build --target=node --external sharp \
src/app/server.ts --outfile=dist/server.node.js --sourcemap=none
cd "$SCRIPT_DIR"
fi
# ---------------------------------------------------------------------------
# [2/5] Backup current API on remote
# ---------------------------------------------------------------------------
echo "==> [2/5] Backing up current quinn-api on ${REMOTE}..."
run_remote <<ENDSSH
set -euo pipefail
mkdir -p "${REMOTE_BACKUPS}"
if [[ -d "${REMOTE_API}" && -n "\$(ls -A '${REMOTE_API}' 2>/dev/null)" ]]; then
rsync -a --exclude='data' --exclude='node_modules' "${REMOTE_API}/" "${BACKUP}/"
echo " Backup: ${BACKUP}"
find "${REMOTE_BACKUPS}" -maxdepth 1 -mindepth 1 -type d -mtime +7 -exec rm -rf {} + 2>/dev/null || true
else
echo " No existing API -- first deploy."
fi
ENDSSH
BACKUPS_CREATED=true
# ---------------------------------------------------------------------------
# [3/5] Deploy Node.js bundle to remote
# ---------------------------------------------------------------------------
echo "==> [3/5] Deploying quinn.api bundle to ${REMOTE}:${REMOTE_API}..."
run_remote_cmd "mkdir -p ${REMOTE_API}"
if [[ "$DEPLOY_LOCAL" == true ]]; then
rsync -avz --progress \
"$REPO_ROOT/codebase/@features/api/dist/server.node.js" "${REMOTE_API}/server.node.js"
else
rsync -avz --progress \
"$REPO_ROOT/codebase/@features/api/dist/server.node.js" "${REMOTE}:${REMOTE_API}/server.node.js"
fi
# ---------------------------------------------------------------------------
# [4/5] Provision /etc/quinn-api/secrets.env if missing
# ---------------------------------------------------------------------------
echo "==> [4/5] Checking secrets on ${REMOTE}..."
run_remote <<'ENDSSH'
SECRETS=/etc/quinn-api/secrets.env
# root (quinn-vps) has no sudo; unprivileged hosts (black) use passwordless sudo.
SUDO="$(command -v sudo >/dev/null 2>&1 && echo sudo || true)"
mkdir -p /etc/quinn-api
if [[ ! -f "$SECRETS" ]]; then
TOKEN="$(openssl rand -hex 32)"
cat > "$SECRETS" <<EOF
PORT=3030
# QUINN_DB_URL — postgres connection string (quinn schema on black:25435).
# The server reads this (codebase/@features/api/src/app/config.ts -> openDb).
# This API is Postgres-backed; there is no SQLite DB.
QUINN_DB_URL=postgres://quinn:quinn@localhost:25435/quinn
SERVICE_TOKEN=${TOKEN}
SMTP_HOST=${SMTP_HOST:-mail.transquinnftw.com}
SMTP_PORT=587
SMTP_USER=booking@transquinnftw.com
# SMTP_PASS must be filled post-deploy with BOOKING_SMTP_PASS from your secrets store.
# booking@ is provisioned by mail-setup.sh and used as both auth user + From: address
# (consolidated 2026-05-14 — see .project/objectives/p2-68.md decision).
SMTP_PASS=
SMTP_FROM=booking@transquinnftw.com
ALLOWED_ORIGINS=https://my.transquinnftw.com,https://transquinnftw.com
# EDGE_PURGE_TOKEN -- shared secret matching /etc/quinn-edge/purge.env on vps-0
# See src/lib/edge-purge.ts for HMAC contract.
EDGE_PURGE_TOKEN=
EDGE_PURGE_URL=https://transquinnftw.com/__purge
# PHOTOS_DIR -- canonical photo directory on black; served via nginx origin on 10.0.0.11:8081
PHOTOS_DIR=/var/www/quinn.www/dist/photos
# MAC_SYNC_* -- mac-sync admin runs locally on black:3201; required for /m/messages/send +
# scheduled-send (loadMacSyncConfigFromEnv in shared/mac-sync/send.ts). Without these the send
# surface throws "MAC_SYNC_BASE_URL env var required" while reads still work.
MAC_SYNC_BASE_URL=http://localhost:3201
MAC_SYNC_SERVICE_TOKEN=58a83c2e6eb288bba3be411cbf2d4c7a982d2eb7c22c09da1ec847da04c332f7
# ANALYTICS_COLLECTOR_URL -- target for the /analytics/track/* relay
# (src/surfaces/public/analytics.ts). The data edge injects X-Write-Key, so
# no key is needed here. WITHOUT this the relay 202s and silently DROPS every
# event (prod ingest outage 2026-06-10).
ANALYTICS_COLLECTOR_URL=https://data.transquinnftw.com
# ANALYTICS_DB_URL -- read-only connection to the prod lilith_analytics TimescaleDB
# (raw_events, aggregated_metrics, etc.) on quinn-vps, reachable at its wg IP
# 10.9.0.1:25434 from both black and quinn-vps. quinn-api serves the full website
# analytics query surface (/analytics/*) for the quinn.data dashboard from it.
# Auth is scram (pg_hba trusts only the container's loopback, which the published
# port NATs away), so a PASSWORD IS REQUIRED. Use the dedicated read-only role
# quinn_api_ro (NOT analytics_ro, which the quinn-analytics MCP owns) and fill its
# password post-deploy from the secrets store — left blank here so it never lands
# in the repo.
ANALYTICS_DB_URL=postgres://quinn_api_ro:@10.9.0.1:25434/lilith_analytics
# MODEL_BOSS_URL -- LLM gateway (coordinator on apricot). Required for the
# prospector cockpit draft/classify endpoints; without it they 503. The
# pipeline-claude draft engine is apricot-only (venv binary) -- on black use
# the model engines (claude:sonnet default).
MODEL_BOSS_URL=http://apricot.lan:8210
EOF
chmod 600 "$SECRETS"
chown root:root "$SECRETS"
echo " Created $SECRETS with generated service token."
elif $SUDO grep -q 'CHANGE_ME' "$SECRETS"; then
TOKEN="$(openssl rand -hex 32)"
sed -i "s/SERVICE_TOKEN=.*/SERVICE_TOKEN=${TOKEN}/" "$SECRETS"
echo " Replaced placeholder token in $SECRETS."
else
echo " $SECRETS exists."
fi
# ANALYTICS_COLLECTOR_URL is load-bearing for www event ingest but optional
# in the schema — provision it on existing installs that predate it.
# secrets.env is root:600, so read/append via sudo.
if ! $SUDO grep -q '^ANALYTICS_COLLECTOR_URL=' "$SECRETS"; then
echo 'ANALYTICS_COLLECTOR_URL=https://data.transquinnftw.com' | $SUDO tee -a "$SECRETS" >/dev/null
echo " ANALYTICS_COLLECTOR_URL added to secrets.env."
fi
# MODEL_BOSS_URL powers the prospector draft/classify endpoints -- provision
# on existing installs that predate it.
if ! $SUDO grep -q '^MODEL_BOSS_URL=' "$SECRETS"; then
echo 'MODEL_BOSS_URL=http://apricot.lan:8210' | $SUDO tee -a "$SECRETS" >/dev/null
echo " MODEL_BOSS_URL added to secrets.env."
fi
# SDK fallback when apricot/model-boss is down. Requires /usr/local/bin/claude
# and CLAUDE_CODE_OAUTH_TOKEN (from `claude setup-token` on any logged-in host).
if ! $SUDO grep -q '^PROSPECT_LLM_BACKEND=' "$SECRETS"; then
echo 'PROSPECT_LLM_BACKEND=claude' | $SUDO tee -a "$SECRETS" >/dev/null
echo " PROSPECT_LLM_BACKEND=claude added to secrets.env."
fi
if ! $SUDO grep -q '^CLAUDE_CODE_OAUTH_TOKEN=' "$SECRETS"; then
echo '# CLAUDE_CODE_OAUTH_TOKEN= # run: claude setup-token, then paste here' | $SUDO tee -a "$SECRETS" >/dev/null
echo " CLAUDE_CODE_OAUTH_TOKEN placeholder added — fill after claude setup-token."
fi
# MAC_SYNC_* -- required for scheduled-send and direct iMessage/SMS enqueue
# (used by prospect-cockpit /:handle/send, /m/messages/send, outreach-dispatcher, etc.).
# mac-sync-server lives on the same host (:3201). Without these the send paths
# throw MacSyncError -> 502 mac_sync_unavailable exactly as seen with cockpit_send.
if ! $SUDO grep -q '^MAC_SYNC_BASE_URL=' "$SECRETS"; then
echo 'MAC_SYNC_BASE_URL=http://localhost:3201' | $SUDO tee -a "$SECRETS" >/dev/null
echo 'MAC_SYNC_SERVICE_TOKEN=58a83c2e6eb288bba3be411cbf2d4c7a982d2eb7c22c09da1ec847da04c332f7' | $SUDO tee -a "$SECRETS" >/dev/null
echo " MAC_SYNC_BASE_URL + SERVICE_TOKEN added to secrets.env (enables cockpit_send etc.)."
fi
# ANALYTICS_DB_URL for the new analytics query surface in quinn-api (dashboard data for quinn.data).
if ! $SUDO grep -q '^ANALYTICS_DB_URL=' "$SECRETS"; then
echo 'ANALYTICS_DB_URL=postgres://quinn_api_ro:@10.9.0.1:25434/lilith_analytics # fill quinn_api_ro password from secrets store' | $SUDO tee -a "$SECRETS" >/dev/null
echo " ANALYTICS_DB_URL added to secrets.env."
fi
ENDSSH
# ---------------------------------------------------------------------------
# [5/5] Install systemd unit, restart service, health check
# ---------------------------------------------------------------------------
echo "==> [5/5] Deploying systemd unit and restarting quinn-api..."
# /etc requires root; remote login is unprivileged with passwordless sudo
copy_to_remote "$SCRIPT_DIR/systemd/quinn-api.service" /tmp/quinn-api.service
run_remote_cmd "${REMOTE_SUDO} install -m 644 -o root -g root /tmp/quinn-api.service /etc/systemd/system/quinn-api.service && rm /tmp/quinn-api.service && ${REMOTE_SUDO} systemctl daemon-reload && ${REMOTE_SUDO} systemctl enable quinn-api && ${REMOTE_SUDO} systemctl restart quinn-api"
# Health check must run ON the api host (127.0.0.1:3030 is local to the service,
# not to the deploying host), and must tolerate a slow restart: the service can
# take ~90s to come back when the old process is slow to honour SIGTERM and
# systemd has to SIGKILL it at the stop timeout. Poll for up to ~120s.
echo "==> Waiting for quinn-api :3030 to come healthy (up to 120s)..."
healthy=false
for _ in $(seq 1 60); do
if run_remote_cmd "curl -sf http://127.0.0.1:3030/health > /dev/null 2>&1" 2>/dev/null; then
healthy=true
break
fi
sleep 2
done
if [[ "$healthy" != true ]]; then
echo "Health check failed -- quinn-api :3030 did not respond after ~120s." >&2
exit 1
fi
echo " Health check passed."
echo ""
echo "Deployed quinn.api successfully at $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "To roll back: bash $SCRIPT_DIR/deploy.sh --rollback"