atlilith/scripts/build-archives.sh
autocommit 05f2666088 chore(git): 🔧 Enforce LF line endings and mark binary files in .gitattributes
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-05-16 21:33:57 -07:00

101 lines
2.6 KiB
Bash
Executable file

#!/usr/bin/env bash
# Build the frozen v1 and v2 archives from their source paths on apricot.
# (v0 is built separately by cache-v0.sh because its source is on NFS.)
#
# Output: .archive/platform.{1,2}.tar.zst
# Excludes: ML model weights, training data, build artifacts, vcs cruft.
# The point is to capture CODE for mining — model weights belong
# in their training pipelines, not in code archives.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
ARCHIVE_DIR="$REPO_ROOT/.archive"
mkdir -p "$ARCHIVE_DIR"
EXCLUDES=(
# vcs / build
--exclude='.git'
--exclude='.gitlab-ci-local'
--exclude='.playwright-mcp'
--exclude='.turbo'
--exclude='.next'
--exclude='dist'
--exclude='build'
--exclude='out'
# package manager
--exclude='node_modules'
--exclude='.pnpm-store'
--exclude='.yarn/cache'
--exclude='.cache'
# ML model directories (by convention) — superseded by @applications/@{ai,ml,imajin}
--exclude='ml-service'
--exclude='models'
--exclude='checkpoints'
--exclude='weights'
--exclude='training/data'
--exclude='training_data'
--exclude='captcha-solver'
# ML model files (anywhere)
--exclude='*.gguf'
--exclude='*.safetensors'
--exclude='*.bin'
--exclude='*.ckpt'
--exclude='*.pth'
--exclude='*.pt'
--exclude='*.onnx'
--exclude='*.h5'
# large media
--exclude='*.mp4'
--exclude='*.mov'
--exclude='*.webm'
--exclude='*.zip'
--exclude='*.tar.gz'
--exclude='*.tar.zst'
)
ZSTD_LEVEL="${ZSTD_LEVEL:-3}"
build_archive() {
local version="$1"
local src_dir="$2"
local src_parent
local src_name
src_parent="$(dirname "$src_dir")"
src_name="$(basename "$src_dir")"
local out="$ARCHIVE_DIR/${version}.tar.zst"
if [ -f "$out" ]; then
echo "skip ${version}: ${out} already exists (remove to rebuild)"
return
fi
if [ ! -d "$src_dir" ]; then
echo "skip ${version}: source not found at ${src_dir}"
return
fi
echo
echo "==> building ${version}${src_dir}"
echo " output: ${out}"
local before
before=$(date +%s)
tar "${EXCLUDES[@]}" -cf - -C "$src_parent" "$src_name" \
| zstd -"$ZSTD_LEVEL" -T0 -o "$out"
local after
after=$(date +%s)
local size
size=$(du -h "$out" | cut -f1)
echo " done in $((after - before))s, size: ${size}"
}
build_archive "platform.1" "$HOME/Code/@projects/@lilith/lilith-platform"
build_archive "platform.2" "$HOME/Code/@projects/@lilith/lilith-platform.live"
echo
echo "built archives:"
ls -lh "$ARCHIVE_DIR"/*.tar.zst 2>/dev/null || true