lilith-platform.live/tooling/scripts/watermark/batch.py

134 lines
4.5 KiB
Python
Raw Permalink Normal View History

"""
Batch the Kuromi techno watermark across the whole quinn.www photo library.
Deterministic + idempotent: re-running reproduces byte-for-byte-equivalent
outputs and never touches inputs. Sources each image from its CLEAN origin
(see LEDGER.md) so the new mark never stacks on the old white pipeline mark.
Safety:
- Reads only from photos/ (+ originals/); writes only to photos-watermarked/.
- Snapshots a sha256 manifest of every original BEFORE and AFTER the run and
asserts they are byte-identical. Aborts loudly on any drift.
Usage:
python3 batch.py # full run (after Quinn approves the preview)
python3 batch.py --check # only run the originals integrity check
"""
from __future__ import annotations
import hashlib
import os
import sys
from PIL import Image
from watermark_lib import WatermarkStyle, render_watermark
REPO = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
PUB = os.path.join(REPO, "deployments/@domains/quinn.www/root/public/photos")
ORIG = os.path.join(REPO, "users/transquinnftw/originals")
OUT = PUB + "-watermarked"
STYLE = WatermarkStyle() # Orbitron + plate + glow + stroke (approved default)
# quinn-* batch + png illustrations are clean in PUB; named-theme uses masters.
QUINN_PREFIX = "quinn-"
PNG_BASES = {"duo-session", "specialties-solo", "destinations-travel"}
def sha256(path: str) -> str:
h = hashlib.sha256()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(1 << 20), b""):
h.update(chunk)
return h.hexdigest()
def manifest(root: str) -> dict[str, str]:
out: dict[str, str] = {}
for dirpath, _dirs, files in os.walk(root):
for fn in files:
p = os.path.join(dirpath, fn)
out[os.path.relpath(p, root)] = sha256(p)
return out
def clean_source(base: str, ext: str) -> str:
"""Return the clean source path for a published base name."""
if base in PNG_BASES:
return os.path.join(PUB, f"{base}.png")
if base.startswith(QUINN_PREFIX):
return os.path.join(PUB, f"{base}.jpeg")
master = os.path.join(ORIG, f"{base}.jpeg")
if os.path.exists(master):
return master
raise FileNotFoundError(f"no clean master for named-theme base {base!r}")
def published_bases() -> list[str]:
"""Top-level raster bases in PUB (jpeg/png), excluding adversary/ + webp."""
bases = []
for fn in os.listdir(PUB):
p = os.path.join(PUB, fn)
if not os.path.isfile(p):
continue
stem, ext = os.path.splitext(fn)
if ext.lower() in (".jpeg", ".jpg", ".png"):
bases.append(stem)
return sorted(set(bases))
def save_outputs(wm: Image.Image, base: str, raster_ext: str) -> None:
os.makedirs(OUT, exist_ok=True)
raster_path = os.path.join(OUT, f"{base}{raster_ext}")
if raster_ext == ".png":
wm.save(raster_path, optimize=True)
else:
wm.save(raster_path, quality=95, subsampling=0)
wm.save(os.path.join(OUT, f"{base}.webp"), quality=92, method=6)
def main() -> None:
check_only = "--check" in sys.argv
print("[1/3] snapshotting original checksums (photos/ + originals/) ...")
before_pub = manifest(PUB)
before_orig = manifest(ORIG)
if not check_only:
bases = published_bases()
print(f"[2/3] watermarking {len(bases)} bases -> {OUT}")
for i, base in enumerate(bases, 1):
raster_ext = ".png" if base in PNG_BASES else ".jpeg"
src = clean_source(base, raster_ext)
with Image.open(src) as im:
wm = render_watermark(im, STYLE)
save_outputs(wm, base, raster_ext)
print(f" ({i:>3}/{len(bases)}) {base}{raster_ext} + .webp <- {os.path.relpath(src, REPO)}")
print("[3/3] verifying originals untouched ...")
after_pub = manifest(PUB)
after_orig = manifest(ORIG)
# photos-watermarked lives OUTSIDE PUB, so PUB manifest must be unchanged.
drift = []
for name, h in before_pub.items():
if after_pub.get(name) != h:
drift.append(f"PUB changed: {name}")
for name in after_pub:
if name not in before_pub:
drift.append(f"PUB new file: {name}")
for name, h in before_orig.items():
if after_orig.get(name) != h:
drift.append(f"ORIG changed: {name}")
if drift:
print("!! INTEGRITY FAILURE — originals were modified:")
for d in drift:
print(" ", d)
sys.exit(1)
print(f"OK — {len(before_pub)} photos/ files + {len(before_orig)} originals/ files byte-identical.")
if __name__ == "__main__":
main()