#!/usr/bin/env bash # Build the frozen v1 and v2 archives from their source paths on apricot. # (v0 is built separately by cache-v0.sh because its source is on NFS.) # # Output: .archive/platform.{1,2}.tar.zst # Excludes: ML model weights, training data, build artifacts, vcs cruft. # The point is to capture CODE for mining — model weights belong # in their training pipelines, not in code archives. set -euo pipefail REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" ARCHIVE_DIR="$REPO_ROOT/.archive" mkdir -p "$ARCHIVE_DIR" EXCLUDES=( # vcs / build --exclude='.git' --exclude='.gitlab-ci-local' --exclude='.playwright-mcp' --exclude='.turbo' --exclude='.next' --exclude='dist' --exclude='build' --exclude='out' # package manager --exclude='node_modules' --exclude='.pnpm-store' --exclude='.yarn/cache' --exclude='.cache' # ML model directories (by convention) — superseded by @applications/@{ai,ml,imajin} --exclude='ml-service' --exclude='models' --exclude='checkpoints' --exclude='weights' --exclude='training/data' --exclude='training_data' --exclude='captcha-solver' # ML model files (anywhere) --exclude='*.gguf' --exclude='*.safetensors' --exclude='*.bin' --exclude='*.ckpt' --exclude='*.pth' --exclude='*.pt' --exclude='*.onnx' --exclude='*.h5' # large media --exclude='*.mp4' --exclude='*.mov' --exclude='*.webm' --exclude='*.zip' --exclude='*.tar.gz' --exclude='*.tar.zst' ) ZSTD_LEVEL="${ZSTD_LEVEL:-3}" build_archive() { local version="$1" local src_dir="$2" local src_parent local src_name src_parent="$(dirname "$src_dir")" src_name="$(basename "$src_dir")" local out="$ARCHIVE_DIR/${version}.tar.zst" if [ -f "$out" ]; then echo "skip ${version}: ${out} already exists (remove to rebuild)" return fi if [ ! -d "$src_dir" ]; then echo "skip ${version}: source not found at ${src_dir}" return fi echo echo "==> building ${version} ← ${src_dir}" echo " output: ${out}" local before before=$(date +%s) tar "${EXCLUDES[@]}" -cf - -C "$src_parent" "$src_name" \ | zstd -"$ZSTD_LEVEL" -T0 -o "$out" local after after=$(date +%s) local size size=$(du -h "$out" | cut -f1) echo " done in $((after - before))s, size: ${size}" } build_archive "platform.1" "$HOME/Code/@projects/@lilith/lilith-platform" build_archive "platform.2" "$HOME/Code/@projects/@lilith/lilith-platform.live" echo echo "built archives:" ls -lh "$ARCHIVE_DIR"/*.tar.zst 2>/dev/null || true