atlilith/scripts/cache-v0.sh

85 lines
2.1 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# Build a local zstd cache of v0 (egirl-platform) from its NFS source.
# Run once on apricot — afterward, ./scripts/extract-archive.sh platform.0 is fast.
#
# Source: /mnt/bigdisk/_/last-linux-backup/applications/src/@egirl/egirl-platform/
# Output: ~/.cache/atlilith-archives/platform.0.tar.zst
#
# Excludes ML weights, build artifacts, etc. — see EXCLUDES below.
set -euo pipefail
SRC="/mnt/bigdisk/_/last-linux-backup/applications/src/@egirl/egirl-platform"
CACHE_DIR="${HOME}/.cache/atlilith-archives"
OUT="${CACHE_DIR}/platform.0.tar.zst"
ZSTD_LEVEL="${ZSTD_LEVEL:-3}"
EXCLUDES=(
# vcs / build
--exclude='.git'
--exclude='.gitlab-ci-local'
--exclude='.playwright-mcp'
--exclude='.turbo'
--exclude='.next'
--exclude='dist'
--exclude='build'
--exclude='out'
# package manager
--exclude='nm_marker_replace'
--exclude='.pnpm-store'
--exclude='.yarn/cache'
--exclude='.cache'
# ML model directories (by convention)
--exclude='ml-service'
--exclude='models'
--exclude='checkpoints'
--exclude='weights'
--exclude='training/data'
--exclude='training_data'
--exclude='captcha-solver'
# ML model files (anywhere)
--exclude='*.gguf'
--exclude='*.safetensors'
--exclude='*.bin'
--exclude='*.ckpt'
--exclude='*.pth'
--exclude='*.pt'
--exclude='*.onnx'
--exclude='*.h5'
# large media
--exclude='*.mp4'
--exclude='*.mov'
--exclude='*.webm'
--exclude='*.zip'
--exclude='*.tar.gz'
--exclude='*.tar.zst'
)
if [ ! -d "$SRC" ]; then
echo "error: source not found: $SRC" >&2
exit 1
fi
mkdir -p "$CACHE_DIR"
if [ -f "$OUT" ]; then
echo "warning: cache exists: $OUT"
read -rp "rebuild? [y/N] " ans
[[ "$ans" =~ ^[yY] ]] || { echo "aborted"; exit 0; }
rm -f "$OUT"
fi
echo "building v0 cache from $SRC (NFS)"
echo "$OUT"
before=$(date +%s)
tar "${EXCLUDES[@]}" -cf - -C "$(dirname "$SRC")" "$(basename "$SRC")" \
| zstd -"$ZSTD_LEVEL" -T0 -o "$OUT"
after=$(date +%s)
size=$(du -h "$OUT" | cut -f1)
echo "done in $((after - before))s, size: $size"