From d34807d8295a1e69eaff2e75e502a0df84b4e0d7 Mon Sep 17 00:00:00 2001 From: Natalie Date: Sun, 28 Jun 2026 17:09:47 -0400 Subject: [PATCH] feat(infra): add Terraform IaC for horizontally scaled on-demand ct-forge CI runners on DO for package publishing - New module infra/terraform/ci-runners/ modeled after test-fleet (cattle, reusable golden image, 0-cost when idle). - Horizontal scale via var.runners (pool of DO droplets for concurrent publish jobs). - On-demand: scale up (e.g. 3-10) before package publish batches across the extracted repos, scale to 0 after (zero cost). - Reuses cocotte-golden image (pre-warmed node/pnpm). - cloud-init auto-downloads/registers forgejo-runner to ct-forge using PAT, sets host-mode labels (self-hosted,linux,do,ct-forge,publish). - Includes templates/publish.yml for use in the new per-package repos (with registry transform, guards, ct-forge target). - variables for forge_url, labels, registration target (supports org-level for shared package runners). - Outputs for IPs/inventory. fmt clean, basic structure verified. This enables the "publish with ondemand infra" for the 49+ package extraction while keeping the 3-orgs model. --- infra/terraform/ci-runners/README.md | 125 +++++++++++++++++ infra/terraform/ci-runners/cloud-init.yaml | 128 ++++++++++++++++++ infra/terraform/ci-runners/main.tf | 111 +++++++++++++++ .../ci-runners/templates/publish.yml | 100 ++++++++++++++ .../ci-runners/terraform.tfvars.example | 20 +++ infra/terraform/ci-runners/variables.tf | 104 ++++++++++++++ infra/terraform/ci-runners/versions.tf | 13 ++ 7 files changed, 601 insertions(+) create mode 100644 infra/terraform/ci-runners/README.md create mode 100644 infra/terraform/ci-runners/cloud-init.yaml create mode 100644 infra/terraform/ci-runners/main.tf create mode 100644 infra/terraform/ci-runners/templates/publish.yml create mode 100644 infra/terraform/ci-runners/terraform.tfvars.example create mode 100644 infra/terraform/ci-runners/variables.tf create mode 100644 infra/terraform/ci-runners/versions.tf diff --git a/infra/terraform/ci-runners/README.md b/infra/terraform/ci-runners/README.md new file mode 100644 index 0000000..f7b3041 --- /dev/null +++ b/infra/terraform/ci-runners/README.md @@ -0,0 +1,125 @@ +# ct-forge CI Runners (on-demand, horizontally scaled on DO) + +Terraform module to provision a pool of disposable DigitalOcean droplets that act as Forgejo Actions runners for the cocottetech (ct-forge) CI, with focus on package publishing. + +## Key Features +- **Horizontally scaled**: `var.runners = N` spins N identical runners. CI jobs (publish) are load-balanced across the pool. +- **On-demand / zero-cost when idle**: Set `runners = 0` to destroy the entire fleet (matches test-fleet cattle pattern). Scale up only when doing package publishes or CI bursts. +- **DO golden image**: Boots from the existing `cocotte-golden*` Packer image (pre-baked node 20 + pnpm 9 + toolchain). Fast boot, warm for publish jobs. +- **Auto-registration**: Each runner registers itself to ct-forge at boot using a PAT (fetches one-time reg token). Labels target publish workflows. +- **Ephemeral-friendly**: Runners are stateless; destroy the droplets after use. Use `--ephemeral` style if extending the runner binary flags. +- **Labels**: Default `self-hosted,linux,do,ct-forge,publish`. Package repos' `.forgejo/workflows/publish.yml` use `runs-on: [self-hosted, linux, do, ct-forge, publish]`. +- **Integrates with existing infra**: Reuses `cocotte-fleet` SSH key, `cocotte:dev` project, golden images, build user "cocotte". + +## Usage (from cocottetech root or infra dir) + +```bash +cd infra/terraform/ci-runners + +# Required +export TF_VAR_do_token="$(cat ~/.vault/do_pat_cocotte)" +export TF_VAR_forge_pat="$(cat ~/.vault/cocotte_forge_pat)" # PAT with runner registration perms on ct-forge + +# Optional overrides +export TF_VAR_forge_url="http://forge.ct.uvlava.com:3000" # or current IP +export TF_VAR_runners=3 + +terraform init +terraform plan +terraform apply -auto-approve + +# After publishes/CI done +terraform apply -var="runners=0" -auto-approve +``` + +The module will: +- Resolve latest golden image. +- Create N droplets. +- Each runs cloud-init that installs/registers the forgejo-runner (using the PAT), starts the service. +- Runners appear in the ct-forge UI under Actions > Runners. + +## Workflow Integration (in the extracted package repos) + +In each new small package repo (after extraction), add `.forgejo/workflows/publish.yml` (or copy from existing small ones like speech-synthesis-mcp): + +```yaml +name: publish + +on: + push: + branches: [main] + workflow_dispatch: + +jobs: + publish: + runs-on: [self-hosted, linux, do, ct-forge, publish] + steps: + - uses: actions/checkout@v4 + - name: setup node/pnpm + # the runner image has it warm + run: node --version && pnpm --version + - name: install & publish + # ... pnpm install, build, npm publish with @lilith:registry to ct-forge registry + # Use secrets for tokens if needed. +``` + +The job will be picked by one of the live ct-forge runners on DO. + +## Variables (see variables.tf) + +- `runners`: the horizontal scale lever (0 for on-demand teardown). +- `forge_pat`: critical for registration (sensitive). +- `labels`, `runner_name_prefix`, `registration_repo` (use org-level if your ct-forge supports shared runners for all packages). +- Size/region tuned for package jobs (lightweight TS publish). + +## Outputs + +- `runner_ips`, `runner_targets`: for debugging/inventory. +- Inventory file rendered to `.local/ci-runners/inventory` (like test-fleet). + +## Cloud-init Details + +See `cloud-init.yaml`: it handles download of forgejo-runner, fetching reg token via PAT + forge API, writing host-mode config, systemd user service, start. + +It assumes the golden image has the "cocotte" user with linger, basic tools. + +If runner binary not in golden, it downloads at boot (acceptable for on-demand). + +## Packer / Golden Image + +This module **reuses** the existing `cocotte-golden` image (see `../packer/`). + +If you want a dedicated lighter "runner-golden" (no full monorepo clone, just toolchain + runner binary pre-baked), run a separate packer build with a "runner" mode in provision.sh and update `golden_name_match`. + +For now, the full golden is fine (extra disk is cheap, and pnpm is pre-warmed if a publish job needs local deps). + +## Horizontal Scaling + On-Demand Publishing + +- Normal: `runners=2` or 3 (always a small hot pool). +- Heavy publish day (many package repos triggering publishes): `runners=8` or 10. +- After: scale to 0. +- Cost: only pay for droplets while runners > 0. Perfect for "publish with ondemand infra". +- The Forgejo server (ct-forge) sees the runners come and go; jobs queue if no runners available (scale up then). + +## Security / Secrets + +- `forge_pat` only used at boot for one-time registration token fetch. Do not store long-term on the runners. +- Use DO project + tags for billing isolation. +- Runners get the fleet SSH key (read-only for dispatch if needed). +- For package publish tokens (registry), use Forgejo repo/org secrets (injected into the job env, not the runner VM). + +## Extending for True Ephemeral (Advanced) + +If you want per-job VMs (true one-shot): +- Extend cloud-init to register with `--ephemeral` flag (if supported in the runner version). +- Have an external autoscaler (small always-on VM or DO Function) that polls the ct-forge pending jobs API and provisions/tears down droplets on demand. +- The current design (scale via terraform) is the simple, auditable, "terraform iac" approach matching the rest of the cocottetech DO fleet. + +## Related + +- test-fleet: sibling module for test/distributed work. +- packer/golden-image: the image these runners boot from. +- The package extraction plan: each small package repo will have its publish workflow targeting these labels. +- LP's `setup-forgejo-host.sh`: reference for the registration/install logic (ported to cloud-init). + +Apply this module, scale as needed for publishes, enjoy clean on-demand horizontally scaled ct-forge runners on DO. diff --git a/infra/terraform/ci-runners/cloud-init.yaml b/infra/terraform/ci-runners/cloud-init.yaml new file mode 100644 index 0000000..688143c --- /dev/null +++ b/infra/terraform/ci-runners/cloud-init.yaml @@ -0,0 +1,128 @@ +#cloud-config +# cloud-init for ct-forge CI runners on DO. +# Boots from cocotte-golden image (has node/pnpm/toolchain). +# Installs and registers forgejo-runner as user service for the cocottetech forge. +# Labels: self-hosted,linux,do,ct-forge,publish (for package publish jobs). +# Ephemeral-friendly: runners can be torn down after use (scale via terraform). + +runcmd: + - | + set -euo pipefail + echo "=== ct-forge runner cloud-init starting ===" + + # Config from template (injected by terraform at apply time) + FORGE_URL="${forge_url}" + FORGE_PAT="${forge_pat}" + RUNNER_NAME="${runner_name}" + LABELS="${labels}" + RUNNER_VERSION="${runner_version:-v12.8.0}" + BUILD_USER="${build_user:-cocotte}" + REGISTRATION_REPO="${registration_repo:-cocottetech/cocottetech}" + + # Get registration token using PAT (org or repo level) + if [ -n "$FORGE_PAT" ]; then + echo "Fetching registration token for $REGISTRATION_REPO ..." + REG_TOKEN=$(curl -sf -X POST \ + -H "Authorization: token $FORGE_PAT" \ + -H "Content-Type: application/json" \ + "${FORGE_URL}/api/v1/repos/${REGISTRATION_REPO}/actions/runners/registration-token" \ + | python3 -c 'import sys,json; print(json.load(sys.stdin).get("token", ""))' ) + if [ -z "$REG_TOKEN" ]; then + echo "ERROR: failed to get registration token. Check PAT permissions for actions runners." + exit 1 + fi + else + echo "ERROR: FORGE_PAT required for registration." + exit 1 + fi + + # Install forgejo-runner binary (if not already in golden) + ARCH=$(uname -m) + case "$ARCH" in + x86_64) GOARCH="amd64" ;; + aarch64) GOARCH="arm64" ;; + *) echo "Unsupported arch $ARCH"; exit 1 ;; + esac + # Note: $${...} here so TF template outputs literal ${...} for the shell on the VM + RUNNER_URL="https://code.forgejo.org/forgejo/runner/releases/download/$${RUNNER_VERSION}/forgejo-runner-$${RUNNER_VERSION#v}-linux-$${GOARCH}" + BIN_DIR="/usr/local/bin" + if ! command -v forgejo-runner >/dev/null 2>&1; then + echo "Downloading forgejo-runner $${RUNNER_VERSION} ..." + curl -fsSL "$RUNNER_URL" -o /tmp/forgejo-runner + chmod +x /tmp/forgejo-runner + mv /tmp/forgejo-runner "$BIN_DIR/forgejo-runner" + fi + forgejo-runner --version + + # Workdir as the build user + WORKDIR="/home/$BUILD_USER/.local/share/forgejo-runner" + mkdir -p "$WORKDIR" + chown "$BUILD_USER:$BUILD_USER" "$WORKDIR" + + # Write config.yaml (host mode for direct execution, no docker socket) + # Use unquoted heredoc so $${LABELS} etc expand on the VM (after TF has already subbed the top-level ${labels}) + cat > "$WORKDIR/config.yaml" < "/home/$BUILD_USER/.config/systemd/user/forgejo-runner.service" < .npmrc + echo "//forge.ct.uvlava.com:4873/:_authToken=\${NPM_TOKEN}" >> .npmrc + # Or use IP during bootstrap: http://134.199.243.61:4873/ + + - name: Transform workspace/file: dependencies to * (for clean registry publish) + run: | + node -e ' + const fs = require("fs"); + if (fs.existsSync("package.json")) { + const pkg = JSON.parse(fs.readFileSync("package.json", "utf8")); + const transform = (deps) => { + if (!deps) return deps; + for (const [name, version] of Object.entries(deps)) { + if (typeof version === "string" && (version.startsWith("workspace:") || version.startsWith("file:"))) { + deps[name] = "*"; + } + } + return deps; + }; + pkg.dependencies = transform(pkg.dependencies); + pkg.devDependencies = transform(pkg.devDependencies); + pkg.peerDependencies = transform(pkg.peerDependencies); + fs.writeFileSync("package.json", JSON.stringify(pkg, null, 2)); + } + ' + + - name: Install + run: pnpm install --no-frozen-lockfile --prefer-offline + + - name: Validate / build if flagged + run: | + pkg_name=$(node -p "require('./package.json').name") + should_build=$(node -p "require('./package.json')._?.build === true || false") + echo "Package: $pkg_name, build: $should_build" + if [ "$should_build" = "true" ] && grep -q '"build"' package.json; then + pnpm run build 2>&1 | tail -10 + fi + + - name: Publish (if flagged and not already present) + run: | + pkg_name=$(node -p "require('./package.json').name") + pkg_version=$(node -p "require('./package.json').version") + should_publish=$(node -p "require('./package.json')._?.publish === true || false") + registry_flag=$(node -p "require('./package.json')._?.registry || 'forgejo'") + + if [ "$registry_flag" != "forgejo" ] && [ "$registry_flag" != "cocotte-forge" ]; then + echo "Skipping: not marked for forgejo registry" + exit 0 + fi + + if [ "$should_publish" != "true" ]; then + echo "Skipping: _ .publish not true" + exit 0 + fi + + if npm view "$pkg_name@$pkg_version" version --registry http://forge.ct.uvlava.com:4873/ 2>/dev/null; then + echo "Already published $pkg_name@$pkg_version, skipping" + else + echo "Publishing $pkg_name@$pkg_version ..." + npm publish --access public --no-git-checks --registry http://forge.ct.uvlava.com:4873/ + fi diff --git a/infra/terraform/ci-runners/terraform.tfvars.example b/infra/terraform/ci-runners/terraform.tfvars.example new file mode 100644 index 0000000..597c894 --- /dev/null +++ b/infra/terraform/ci-runners/terraform.tfvars.example @@ -0,0 +1,20 @@ +# Example for ct-forge on-demand CI runners. +# Copy to terraform.tfvars or use -var-file. + +# DO +do_token = "..." # from env or vault; sensitive + +# Scale (horizontal + on-demand) +runners = 2 # 0 to tear down completely + +# Forge (ct-forge / cocottetech forge) +forge_url = "http://forge.ct.uvlava.com:3000" +forge_pat = "..." # PAT with runner reg rights (sensitive) + +# Tuning +runner_name_prefix = "ct-forge-do" +labels = "self-hosted,linux,do,ct-forge,publish" +size = "s-2vcpu-4gb" # lightweight for package publish jobs + +# If using org-level registration for all packages +registration_repo = "cocottetech" # or "packages" org if you created one diff --git a/infra/terraform/ci-runners/variables.tf b/infra/terraform/ci-runners/variables.tf new file mode 100644 index 0000000..1837ccd --- /dev/null +++ b/infra/terraform/ci-runners/variables.tf @@ -0,0 +1,104 @@ +variable "do_token" { + description = "DigitalOcean API token (Read/Write). Export as TF_VAR_do_token." + type = string + sensitive = true +} + +variable "runners" { + description = <<-EOT + Number of on-demand CI runners (horizontally scaled pool). + 0 = zero cost, all runners destroyed. + Set to N (e.g. 2-5 for normal, 10+ during heavy package publish batches) to horizontally scale. + Each runner is a disposable DO droplet registered to ct-forge. + The Forgejo CI (package publish workflows) will pick from the pool using labels. + EOT + type = number + default = 0 + + validation { + condition = var.runners >= 0 && var.runners <= 20 + error_message = "Keep between 0 and 20 for sanity / account limits." + } +} + +variable "region" { + type = string + default = "nyc3" +} + +variable "size" { + description = "Droplet size. Small for package publish (TS/JS mostly): s-2vcpu-4gb or s-4vcpu-8gb. Larger if needed for heavy builds." + type = string + default = "s-4vcpu-8gb" +} + +variable "base_image" { + description = "Override; leave empty to use latest golden (cocotte-golden*). Use stock ubuntu for bootstrap." + type = string + default = "" +} + +variable "golden_name_match" { + type = string + default = "cocotte-golden" +} + +variable "ssh_key_name" { + type = string + default = "cocotte-fleet" +} + +variable "name" { + type = string + default = "ct-forge-ci-runner" +} + +variable "runner_name_prefix" { + description = "Prefix for runner names shown in Forgejo UI." + type = string + default = "ct-forge-do" +} + +variable "labels" { + description = "Comma or colon separated labels for the runners (used in workflow runs-on)." + type = string + default = "self-hosted,linux,do,ct-forge,publish" +} + +variable "forge_url" { + description = "ct-forge (cocottetech forge) base URL, e.g. http://forge.ct.uvlava.com:3000 or the IP." + type = string +} + +variable "forge_pat" { + description = "Personal Access Token (PAT) for the forge (must have permission to generate runner registration tokens for the target org/repo). Sensitive." + type = string + sensitive = true +} + +variable "runner_version" { + type = string + default = "v12.8.0" +} + +variable "build_user" { + description = "User that will run the forgejo-runner service (matches the one in golden image)." + type = string + default = "cocotte" +} + +variable "registration_repo" { + description = "The repo (or org) to register runners against for token. e.g. cocottetech/cocottetech or the packages org. Use org-level if supported for shared runners across package repos." + type = string + default = "cocottetech/cocottetech" +} + +variable "do_project" { + type = string + default = "cocotte:dev" +} + +variable "tags" { + type = list(string) + default = ["ci", "runner", "ct-forge", "do"] +} \ No newline at end of file diff --git a/infra/terraform/ci-runners/versions.tf b/infra/terraform/ci-runners/versions.tf new file mode 100644 index 0000000..ae264db --- /dev/null +++ b/infra/terraform/ci-runners/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_version = ">= 1.5" + required_providers { + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } + local = { + source = "hashicorp/local" + version = "~> 2.0" + } + } +} \ No newline at end of file