scripts(scripts-scripts/): 🔨 Update archive build script to improve distribution archive generation logic
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
94dc5dad05
commit
ae3d0cd909
1 changed files with 52 additions and 11 deletions
|
|
@ -2,9 +2,10 @@
|
|||
# Build the frozen v0/v1/v2 archives from their source paths on apricot.
|
||||
# Run this ONCE (or to refresh). Output: .archive/platform.{0,1,2}.tar.zst
|
||||
#
|
||||
# Excludes: .git, node_modules, .turbo, dist, build, .next — these bloat
|
||||
# the archive without adding mining value. .git is excluded everywhere
|
||||
# because v0's .git alone is 229G.
|
||||
# These are CODE archives meant for mining patterns. ML model weights,
|
||||
# training data, large media, and CAPTCHA blobs are excluded — they're
|
||||
# not what we mine from. If you need a specific weight, fetch it from
|
||||
# its original training pipeline, not from these archives.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
|
|
@ -12,19 +13,59 @@ REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|||
ARCHIVE_DIR="$REPO_ROOT/.archive"
|
||||
mkdir -p "$ARCHIVE_DIR"
|
||||
|
||||
# Excludes applied to every tarball
|
||||
# Excludes applied to every tarball.
|
||||
#
|
||||
# Layer 1 — vcs/build artifacts (always junk for code-mining)
|
||||
# Layer 2 — package manager caches (rebuildable)
|
||||
# Layer 3 — ML model directories (by convention name)
|
||||
# Layer 4 — ML model files (by extension)
|
||||
# Layer 5 — known bloat dirs found in v1
|
||||
EXCLUDES=(
|
||||
# vcs / build
|
||||
--exclude='.git'
|
||||
--exclude='node_modules'
|
||||
--exclude='.turbo'
|
||||
--exclude='dist'
|
||||
--exclude='build'
|
||||
--exclude='.next'
|
||||
--exclude='.gitlab-ci-local'
|
||||
--exclude='.playwright-mcp'
|
||||
--exclude='.turbo'
|
||||
--exclude='.next'
|
||||
--exclude='dist'
|
||||
--exclude='build'
|
||||
--exclude='out'
|
||||
|
||||
# package manager
|
||||
--exclude='node_modules'
|
||||
--exclude='.pnpm-store'
|
||||
--exclude='.yarn/cache'
|
||||
--exclude='.cache'
|
||||
|
||||
# ML model directories (by convention)
|
||||
--exclude='ml-service'
|
||||
--exclude='models'
|
||||
--exclude='checkpoints'
|
||||
--exclude='weights'
|
||||
--exclude='training/data'
|
||||
--exclude='training_data'
|
||||
--exclude='captcha-solver'
|
||||
|
||||
# ML model files (anywhere)
|
||||
--exclude='*.gguf'
|
||||
--exclude='*.safetensors'
|
||||
--exclude='*.bin'
|
||||
--exclude='*.ckpt'
|
||||
--exclude='*.pth'
|
||||
--exclude='*.pt'
|
||||
--exclude='*.onnx'
|
||||
--exclude='*.h5'
|
||||
|
||||
# large media not relevant for code mining
|
||||
--exclude='*.mp4'
|
||||
--exclude='*.mov'
|
||||
--exclude='*.webm'
|
||||
--exclude='*.zip'
|
||||
--exclude='*.tar.gz'
|
||||
--exclude='*.tar.zst'
|
||||
)
|
||||
|
||||
# zstd level (-3 is fast/balanced; -19 max compression but slow)
|
||||
# zstd level (-3 fast; -19 max). Default 3.
|
||||
ZSTD_LEVEL="${ZSTD_LEVEL:-3}"
|
||||
|
||||
build_archive() {
|
||||
|
|
@ -68,7 +109,7 @@ build_archive "platform.0" \
|
|||
build_archive "platform.1" \
|
||||
"$HOME/Code/@projects/@lilith/lilith-platform"
|
||||
|
||||
# v2 — lilith-platform.live (Quinn-personal, currently in prod — do not touch original)
|
||||
# v2 — lilith-platform.live (Quinn-personal — production; read-only)
|
||||
build_archive "platform.2" \
|
||||
"$HOME/Code/@projects/@lilith/lilith-platform.live"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue