Some checks are pending
CI / verify (push) Waiting to run
Validated OSS (Qwen3.6-27B-AEON-Uncensored) Quinn-voice drafting against the agent-matcher reply-queue baseline. Four methodology fixes eliminate the early weaknesses: json_schema strict (0% malformed), canon few-shot (100% on-voice), current-facts/location-from-context (0 location errors), and classify-move-first then reply (matcher-level discipline on defensive moves: withhold address, redirect harvesters+crude to OF). PII stays under gitignored .data/; scripts only. Claude is the offline judge/advisor, never the runtime generator. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
61 lines
3 KiB
Python
61 lines
3 KiB
Python
#!/usr/bin/env python3
|
|
"""Build the (pseudonymized) eval set from the agent-matcher reply-queue + chat.db.
|
|
|
|
Source of truth for the recent set = the Executor agent-matcher reply-queue
|
|
(handles + classified cat/tmpl + the matcher's drafted reply, the baseline we
|
|
score against). Full conversation context is pulled from the local Messages
|
|
chat.db. PII handling: phone-number handles are replaced with RQ_NN pseudonyms;
|
|
the pseudonym->handle map stays local (*.local.json, gitignored) and is NEVER
|
|
sent off the laptop. Conversation text is required (it's the model input) and is
|
|
written only under DATA_DIR (gitignored).
|
|
|
|
Env: REPLY_QUEUE (path to the matcher reply-queue json), DATA_DIR (default ./.data).
|
|
Writes <DATA_DIR>/eval_set.json, canon_templates.json, handle_map.local.json.
|
|
"""
|
|
import sqlite3, json, os
|
|
|
|
DATA = os.environ.get("DATA_DIR", os.path.join(os.path.dirname(__file__), ".data"))
|
|
os.makedirs(DATA, exist_ok=True)
|
|
REPLY_QUEUE = os.environ.get("REPLY_QUEUE",
|
|
os.path.expanduser("~/Documents/Claude/Projects/Executor/prospecting/reply-queue-2026-06-28.json"))
|
|
CHATDB = os.path.expanduser("~/Library/Messages/chat.db")
|
|
|
|
def decode_attr(data):
|
|
"""Pull the text out of a Messages attributedBody blob (typedstream)."""
|
|
if not data:
|
|
return None
|
|
try:
|
|
idx = data.index(b"NSString")
|
|
p = data.index(b"\x2b", idx) + 1 # the 0x2b value-marker after the class name
|
|
except ValueError:
|
|
return None
|
|
length = data[p]
|
|
if length == 0x81: # long form: 2-byte LE length follows
|
|
length = int.from_bytes(data[p + 1:p + 3], "little"); p += 3
|
|
else:
|
|
p += 1
|
|
return (data[p:p + length].decode("utf-8", "replace").strip()) or None
|
|
|
|
rq = json.load(open(REPLY_QUEUE))
|
|
json.dump(rq.get("templates", {}), open(os.path.join(DATA, "canon_templates.json"), "w"))
|
|
db = sqlite3.connect(f"file:{CHATDB}?mode=ro", uri=True)
|
|
|
|
eval_set, mapping = [], {}
|
|
for i, item in enumerate(rq["queue"], 1):
|
|
handle, pid = item["to"], f"RQ_{i:02d}"
|
|
rows = db.execute(
|
|
"SELECT m.is_from_me,m.text,m.attributedBody FROM message m "
|
|
"JOIN handle hd ON m.handle_id=hd.ROWID WHERE hd.id=? ORDER BY m.date ASC", (handle,)).fetchall()
|
|
turns = [{"who": "quinn" if is_me else "client", "text": (text or "").strip() or decode_attr(ab)}
|
|
for is_me, text, ab in rows]
|
|
turns = [t for t in turns if t["text"]] or [{"who": "client", "text": item["their_last"]}]
|
|
while turns and turns[-1]["who"] == "quinn": # end on the client's turn
|
|
turns = turns[:-1]
|
|
ctx = "\n".join(f"{t['who'].upper()}: {t['text']}" for t in turns[-14:])
|
|
eval_set.append({"id": pid, "context": ctx, "their_last": item["their_last"],
|
|
"cat": item["cat"], "tmpl": item["tmpl"], "matcher_reply": item["body"]})
|
|
mapping[pid] = handle
|
|
|
|
json.dump(eval_set, open(os.path.join(DATA, "eval_set.json"), "w"), ensure_ascii=False)
|
|
json.dump(mapping, open(os.path.join(DATA, "handle_map.local.json"), "w"))
|
|
print(f"eval set: {len(eval_set)} convos -> {DATA}/eval_set.json (pseudonymized; map kept local)")
|