infra(uvlava): add DNS zone for uvlava.com infranet (ct.uvlava.com namespace)

Account-namespaced infranet DNS, DO-managed:
- uvlava.com zone + forge.ct / npm.ct / backend.ct / db.ct / apex records
- forge.ct + npm.ct -> cocotte-forge (134.199.243.61); become HTTPS endpoints
  once Caddy/LE is up, replacing the interim bare-IP plaintext npm registry
- outputs: uvlava_nameservers (for joker.com NS delegation) + ct_infra_hosts

Inert until uvlava.com NS is delegated to DO at the registrar.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Natalie 2026-06-28 08:21:26 -04:00
parent 284510b9ac
commit 66df0ecb96
4 changed files with 342 additions and 1 deletions

View file

@ -53,7 +53,7 @@ region call and PG sizing are settled. Apply gates on a verified account (done)
and registered SSH keys.
```sh
cd infrastructure/terraform/do
cd ~/Code/@projects/uvlava/terraform/do # IaC moved out of the v2 tree into the uvlava infranet repo
export TF_VAR_do_token="$(cat ~/.vault/do-pat-ct.token)"
export TF_VAR_spaces_access_id="…" # API → Spaces Keys
export TF_VAR_spaces_secret_key="…"

76
terraform/do/dns.tf Normal file
View file

@ -0,0 +1,76 @@
###############################################################################
# uvlava.com shared infranet DNS zone (DO-managed).
#
# Account-namespaced layout: each DO account's infra lives under its own
# <account>.uvlava.com namespace, so the single zone can host both:
# ct.uvlava.com -> this account (TransQuinnFTW / ct:prod) store + infra tier
# mc.uvlava.com -> magic-civilization account (separate PAT) wired later
#
# DELEGATION: these records are INERT until the registrar (joker.com) delegates
# uvlava.com NS to DigitalOcean (ns1/ns2/ns3.digitalocean.com see the
# `uvlava_nameservers` output). Until then nothing here resolves publicly.
#
# TLS: forge.ct / npm.ct are A records to the cocotte-forge droplet, which runs
# a reverse proxy (Caddy/LE) terminating HTTPS in front of Forgejo (:3000) and
# Verdaccio (:4873). The HTTPS npm endpoint (https://npm.ct.uvlava.com) replaces
# the interim bare-IP plaintext registry (http://134.199.243.61:4873) and closes
# the plaintext-over-public-internet supply-chain risk.
###############################################################################
variable "forge_public_ip" {
description = "Public IP of the cocotte-forge droplet (Forgejo :3000 + Verdaccio :4873). Created out-of-band; not a TF resource in this state."
type = string
default = "134.199.243.61"
}
resource "digitalocean_domain" "uvlava" {
name = "uvlava.com"
}
# --- ct.uvlava.com : this account's (ct:prod) infra namespace ----------------
# Forgejo git + web UI (HTTPS reverse-proxied to :3000 on the forge droplet).
resource "digitalocean_record" "ct_forge" {
domain = digitalocean_domain.uvlava.name
type = "A"
name = "forge.ct"
value = var.forge_public_ip
ttl = 300
}
# Verdaccio @lilith npm registry (HTTPS reverse-proxied to :4873).
resource "digitalocean_record" "ct_npm" {
domain = digitalocean_domain.uvlava.name
type = "A"
name = "npm.ct"
value = var.forge_public_ip
ttl = 300
}
# Backend droplet workers / MCP / pgBouncer bridge (reserved public IP).
resource "digitalocean_record" "ct_backend" {
domain = digitalocean_domain.uvlava.name
type = "A"
name = "backend.ct"
value = digitalocean_reserved_ip.backend.ip_address
ttl = 300
}
# Managed PG bridge host alias (the PG cluster itself stays VPC-private; this
# names the pgBouncer bridge that fronts it on the backend droplet).
resource "digitalocean_record" "ct_db" {
domain = digitalocean_domain.uvlava.name
type = "CNAME"
name = "db.ct"
value = "backend.ct.uvlava.com."
ttl = 300
}
# Apex -> forge for now, so bare uvlava.com resolves to a real host.
resource "digitalocean_record" "apex" {
domain = digitalocean_domain.uvlava.name
type = "A"
name = "@"
value = var.forge_public_ip
ttl = 300
}

View file

@ -51,3 +51,23 @@ output "gpu_droplet_ip" {
description = "GPU droplet private IP (null unless gpu_enabled and account allowlisted)."
value = var.gpu_enabled ? digitalocean_droplet.gpu[0].ipv4_address_private : null
}
###############################################################################
# uvlava.com infranet DNS (dns.tf)
###############################################################################
output "uvlava_nameservers" {
description = "Set these as the authoritative NS for uvlava.com at the registrar (joker.com) to delegate the zone to DigitalOcean."
value = ["ns1.digitalocean.com", "ns2.digitalocean.com", "ns3.digitalocean.com"]
}
output "ct_infra_hosts" {
description = "ct.uvlava.com infra FQDNs (resolve once NS is delegated; HTTPS once Caddy/LE is up on the forge droplet)."
value = {
forge = "forge.ct.uvlava.com"
npm = "npm.ct.uvlava.com"
backend = "backend.ct.uvlava.com"
db = "db.ct.uvlava.com"
apex = "uvlava.com"
}
}

245
terraform/do/redroid.tf Normal file
View file

@ -0,0 +1,245 @@
# Redroid (containerized Android) for Mr. Number lookup tool.
# See .project/handoffs/20260627_mr-number-redroid-do.md
# redroid because DO has no nested virt for SDK emulator.
# Requires host support for binder/ashmem (modprobe in user_data; may need DKMS on DO kernel).
# Volume for /data to persist Google sign-in + paid Mr. Number app.
resource "digitalocean_volume" "redroid_data" {
region = var.region
name = "redroidmrnumberdata"
size = 20
initial_filesystem_type = "ext4"
description = "Persistent /data for redroid (paid Mr. Number reports state)"
}
resource "digitalocean_droplet" "redroid" {
name = "lilith-store-redroid"
image = "ubuntu-22-04-x64"
size = "s-2vcpu-4gb"
region = var.region
vpc_uuid = digitalocean_vpc.store.id
ssh_keys = var.ssh_key_fingerprints
tags = concat(var.tags, ["redroid", "android", "mr-number"])
user_data = <<-EOT
#!/bin/bash
set -euo pipefail
# Wait for DNS/network before apt. The FIRST attempt died here: resolv.conf wasn't ready,
# apt-get update failed, and `set -e` aborted before binder was ever installed. Retry until up.
for n in $(seq 1 30); do
if getent hosts archive.ubuntu.com >/dev/null 2>&1; then break; fi
echo "waiting for DNS/network ($n)..."; sleep 5
done
apt_retry() { for n in $(seq 1 5); do apt-get "$@" && return 0; echo "apt retry $n"; sleep 10; done; return 1; }
apt_retry update -y
# linux-modules-extra-$(uname -r) PROVIDES binder_linux.ko + ashmem_linux.ko on Ubuntu.
# The prior attempt OMITTED this and concluded "kernel can't load binder" -- it just was never installed.
# PROVEN 2026-06-27 on DO kernel 5.15.0-171-generic: both modules load cleanly.
apt_retry install -y "linux-modules-extra-$(uname -r)" docker.io
# Load binder/ashmem -- critical. If absent after install, try the DKMS module build; if STILL
# absent, this kernel genuinely can't do it -> write BINDER_FAILED and do NOT start a broken container.
modprobe binder_linux devices=binder,hwbinder,vndbinder || true
modprobe ashmem_linux || true # ashmem optional; removed since 5.18, present on 22.04/5.15
if ! lsmod | grep -q binder_linux; then
echo "binder absent after linux-modules-extra; attempting DKMS build" | tee -a /var/log/redroid-bootstrap.log
apt-get install -y dkms git build-essential "linux-headers-$(uname -r)" || true
git clone --depth 1 https://github.com/remote-android/redroid-modules.git /opt/redroid-modules || true
(cd /opt/redroid-modules && ./install.sh) || true
modprobe binder_linux devices=binder,hwbinder,vndbinder || true
fi
if lsmod | grep -q binder_linux; then
echo "BINDER_OK" > /root/BINDER_STATUS
else
echo "BINDER_FAILED" > /root/BINDER_STATUS
echo "binder_linux could not be loaded on $(uname -r) -- redroid NOT started. See handoff STOP rule." | tee -a /var/log/redroid-bootstrap.log
exit 1
fi
systemctl enable --now docker
mkdir -p /data/redroid
# Post-apply: mount the volume if not auto: mount /dev/disk/by-id/scsi-0DO_Volume_redroidmrnumberdata /data/redroid
# adb on 5555 is reachable only from admin_ips (DO firewall, see digitalocean_firewall.redroid).
# --restart unless-stopped: container (and adb) comes back after a droplet reboot.
docker run -d --privileged --restart unless-stopped \
--name redroid-mrnumber \
-v /data/redroid:/data \
-p 5555:5555 \
redroid/redroid:11.0.0-latest \
androidboot.redroid_gpu_mode=guest
# Current platform-tools adb (Ubuntu's android-tools-adb handshakes fine, but on first TCP
# connect during boot the device latches "offline"; restarting guest adbd after dev.bootcomplete
# and reconnecting clears it). ws-scrcpy + plum both rely on this.
curl -fsSL -o /opt/pt.zip https://dl.google.com/android/repository/platform-tools-latest-linux.zip && \
(cd /opt && rm -rf platform-tools && (command -v unzip || apt_retry install -y unzip) && unzip -q pt.zip && ln -sf /opt/platform-tools/adb /usr/local/bin/adb) || true
# Wait for Android boot, then reset adbd so TCP adb goes "device" (not "offline").
for n in $(seq 1 40); do
bc=$(/usr/local/bin/adb -s localhost:5555 shell getprop dev.bootcomplete 2>/dev/null | tr -d "\r" || true)
/usr/local/bin/adb connect localhost:5555 >/dev/null 2>&1 || true
[ "$bc" = "1" ] && break; sleep 10
done
docker exec redroid-mrnumber setprop ctl.restart adbd 2>/dev/null || true
/usr/local/bin/adb disconnect localhost:5555 >/dev/null 2>&1 || true; /usr/local/bin/adb kill-server >/dev/null 2>&1 || true
/usr/local/bin/adb connect localhost:5555 >/dev/null 2>&1 || true
echo "redroid up. From plum (firewall-allowed): adb connect <ipv4>:5555"
echo "Inside redroid: install GApps, sign in with paid Mr. Number account, install com.mrnumber.blocker (paid tier for reports)"
# Web console setup (ws-scrcpy + nginx SSO gate)
# Install node for ws-scrcpy
curl -fsSL https://deb.nodesource.com/setup_18.x | bash -
apt-get install -y nodejs git nginx
# ws-scrcpy
cd /opt
rm -rf ws-scrcpy
git clone --depth 1 https://github.com/NetrisTV/ws-scrcpy.git
cd ws-scrcpy
npm install
npm run dist
# systemd for ws-scrcpy on localhost:8000
cat > /etc/systemd/system/ws-scrcpy.service << SERVICE
[Unit]
Description=ws-scrcpy for Mr. Number web console
After=network.target docker.service
[Service]
Type=simple
User=root
WorkingDirectory=/opt/ws-scrcpy/dist
ExecStart=/usr/bin/node index.js --port 8000 --host 127.0.0.1 --adb-host 127.0.0.1 --adb-port 5555
Restart=always
RestartSec=10
StandardOutput=append:/var/log/ws-scrcpy.log
StandardError=append:/var/log/ws-scrcpy.log
[Install]
WantedBy=multi-user.target
SERVICE
systemctl daemon-reload
systemctl enable --now ws-scrcpy
# nginx site for /android-console/ with SSO
cat > /etc/nginx/sites-available/android-console << NGINX
server {
listen 80;
server_name _;
location = /_sso_verify {
internal;
proxy_pass https://sso.transquinnftw.com/auth/validate;
proxy_pass_request_body off;
proxy_set_header Content-Length "";
proxy_set_header Cookie $http_cookie;
proxy_set_header X-Original-URI $request_uri;
proxy_set_header X-Forwarded-Host $host;
}
location @sso_redirect {
return 302 https://sso.transquinnftw.com/login?redirect=https://\$host\$request_uri;
}
location /android-console/ {
auth_request /_sso_verify;
error_page 401 = @sso_redirect;
proxy_pass http://127.0.0.1:8000/;
proxy_http_version 1.1;
proxy_set_header Upgrade \$http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host \$host;
proxy_set_header X-Real-IP \$remote_addr;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto \$scheme;
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
}
location /android-console {
return 301 /android-console/;
}
}
NGINX
ln -sf /etc/nginx/sites-available/android-console /etc/nginx/sites-enabled/
nginx -t && nginx -s reload || true
echo "ws-scrcpy and nginx console ready on :8000 /android-console/"
EOT
lifecycle {
# user_data only runs at first boot; the box is live with the paid Mr.Number
# app + Google sign-in + ws-scrcpy console already installed. Without this,
# any edit to the user_data above forces a destroy+recreate that wipes it.
# Mirrors the backend droplet. Re-provision deliberately, never via drift.
ignore_changes = [user_data]
}
}
resource "digitalocean_volume_attachment" "redroid_data" {
droplet_id = digitalocean_droplet.redroid.id
volume_id = digitalocean_volume.redroid_data.id
}
resource "digitalocean_firewall" "redroid" {
name = "lilith-store-redroid-fw"
droplet_ids = [digitalocean_droplet.redroid.id]
inbound_rule {
protocol = "tcp"
port_range = "22"
source_addresses = var.admin_ips
}
inbound_rule {
protocol = "tcp"
port_range = "5555"
source_addresses = var.admin_ips
}
outbound_rule {
protocol = "tcp"
port_range = "1-65535"
destination_addresses = ["0.0.0.0/0", "::/0"]
}
outbound_rule {
protocol = "udp"
port_range = "1-65535"
destination_addresses = ["0.0.0.0/0", "::/0"]
}
}
output "redroid_ipv4" {
value = digitalocean_droplet.redroid.ipv4_address
}
output "redroid_adb" {
value = "${digitalocean_droplet.redroid.ipv4_address}:5555"
}
# Usage after apply:
# adb connect <redroid_ipv4>:5555
# (from plum that has the mr-number-lookup tool)
# Then: python3 .../mr_lookup.py --phone "+1555..." --client-id XXX --device <redroid_ipv4>:5555
# (or export MR_NUMBER_DEVICE=... )
# NOTE (2026-06-27, RESOLVED redroid on DO WORKS):
# The first attempt's user_data never installed linux-modules-extra-$(uname -r) (the package that provides
# binder_linux.ko) and died on a boot-time DNS/apt race, then wrongly concluded "DO kernel can't load binder".
# PROVEN on a rebuilt droplet (id 580727907, 45.55.191.82, kernel 5.15.0-171-generic):
# - linux-modules-extra-5.15.0-171-generic installs; binder_linux + ashmem_linux load clean.
# - redroid Android 11 boots (dev.bootcomplete=1); abilist has arm64/armeabi (ARM Play Store apps run).
# - adb localhost:5555 -> "device"; UI control (input keyevent) works.
# This user_data now: waits for DNS + retries apt (race fix), verify-gates binder (exit 1 -> BINDER_FAILED),
# auto-restarts the container, and resets adbd post-boot so TCP adb isn't stuck "offline".
# REMAINING (manual, Quinn): sideload GApps, sign into the paid Mr. Number Google account, install
# com.mrnumber.blocker. Web console (ws-scrcpy/nginx, below) installs but was finished manually this round.
# SSH: ssh -i ~/.ssh/id_ed25519_1984 root@<ipv4> (plum key + IP already in ssh_keys/admin_ips).