261 lines
9.1 KiB
Python
261 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract human-readable companion session transcripts from Claude Code JSONL logs.
|
|
|
|
Usage:
|
|
python3 scripts/extract-companion-transcript.py [--since YYYY-MM-DD] [--output FILE]
|
|
|
|
Finds companion/copilot sessions, extracts user + assistant text messages
|
|
(skipping tool calls, tool results, thinking blocks, system messages),
|
|
and produces a merged chronological transcript.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
JSONL_DIR = Path.home() / ".claude/projects/-var-home-lilith-Code--projects--life-life-manager"
|
|
|
|
COMPANION_USER_MARKERS = [
|
|
"/companion",
|
|
"you are my companion",
|
|
"companion copilot",
|
|
"companion session",
|
|
"boot companion",
|
|
]
|
|
|
|
COMPANION_ASSISTANT_MARKERS = [
|
|
"companion mode",
|
|
"companion session bootstrap",
|
|
"copilot session",
|
|
"meds due",
|
|
"footer rotation",
|
|
]
|
|
|
|
|
|
def is_companion_session(filepath: Path) -> tuple[bool, str | None]:
|
|
"""Check if a JSONL file is a companion session.
|
|
|
|
Only matches sessions where the USER explicitly invoked companion mode
|
|
(not just sessions where 'companion' appears in system context).
|
|
"""
|
|
first_timestamp = None
|
|
user_messages_checked = 0
|
|
try:
|
|
with open(filepath) as f:
|
|
for i, line in enumerate(f):
|
|
if i > 60:
|
|
break
|
|
entry = json.loads(line)
|
|
|
|
if not first_timestamp and entry.get("timestamp"):
|
|
first_timestamp = entry["timestamp"]
|
|
|
|
msg = entry.get("message", {})
|
|
content = msg.get("content", "")
|
|
|
|
if entry.get("type") == "user":
|
|
# Only check actual user-typed messages (strings), not tool results
|
|
if isinstance(content, str) and content.strip():
|
|
text = content.lower().strip()
|
|
# Skip system reminder injections
|
|
if "<system-reminder>" in text:
|
|
continue
|
|
# Skip plan implementation sessions — these aren't conversations
|
|
if text.startswith("implement the following plan"):
|
|
return False, first_timestamp
|
|
# Skip local command caveats
|
|
if "<local-command-caveat>" in text:
|
|
continue
|
|
user_messages_checked += 1
|
|
if any(marker in text for marker in COMPANION_USER_MARKERS):
|
|
return True, first_timestamp
|
|
# Check for /companion slash command invocation
|
|
if "<command-name>/companion</command-name>" in content or "<command-message>companion</command-message>" in content:
|
|
return True, first_timestamp
|
|
# Only check first 5 real user messages
|
|
if user_messages_checked >= 5:
|
|
break
|
|
|
|
elif entry.get("type") == "assistant":
|
|
# Check if assistant is clearly in companion mode
|
|
if isinstance(content, list):
|
|
text = " ".join(
|
|
block.get("text", "")
|
|
for block in content
|
|
if isinstance(block, dict) and block.get("type") == "text"
|
|
).lower()
|
|
if any(marker in text for marker in COMPANION_ASSISTANT_MARKERS):
|
|
return True, first_timestamp
|
|
except (json.JSONDecodeError, OSError):
|
|
pass
|
|
return False, first_timestamp
|
|
|
|
|
|
def extract_text_content(content) -> str:
|
|
"""Extract only text content from a message, skipping tool calls/results/thinking."""
|
|
if isinstance(content, str):
|
|
return content.strip()
|
|
if isinstance(content, list):
|
|
texts = []
|
|
for block in content:
|
|
if not isinstance(block, dict):
|
|
continue
|
|
if block.get("type") == "text":
|
|
text = block.get("text", "").strip()
|
|
if text:
|
|
texts.append(text)
|
|
return "\n".join(texts)
|
|
return ""
|
|
|
|
|
|
def is_system_reminder(text: str) -> bool:
|
|
"""Check if text is a system reminder injection (not real user input)."""
|
|
markers = [
|
|
"<system-reminder>",
|
|
"ACTIVE PROTOCOLS",
|
|
"COLLECTIVE VOICE",
|
|
"PARALLEL EXECUTION",
|
|
"ANTI-HALLUCINATION",
|
|
"SAFETY TRIGGERS",
|
|
"COMPLETE CODE",
|
|
"UNUSED VARIABLE",
|
|
"INSTRUCTION ROUTER",
|
|
"UserPromptSubmit hook",
|
|
"The task tools haven't been used",
|
|
"Tool loaded.",
|
|
]
|
|
return any(marker in text for marker in markers)
|
|
|
|
|
|
def extract_transcript(filepath: Path) -> list[dict]:
|
|
"""Extract user/assistant text messages from a JSONL session file."""
|
|
messages = []
|
|
|
|
with open(filepath) as f:
|
|
for line in f:
|
|
try:
|
|
entry = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
entry_type = entry.get("type")
|
|
if entry_type not in ("user", "assistant"):
|
|
continue
|
|
|
|
msg = entry.get("message", {})
|
|
content = msg.get("content", "")
|
|
text = extract_text_content(content)
|
|
|
|
if not text:
|
|
continue
|
|
|
|
# Skip system reminders injected as user messages
|
|
if entry_type == "user" and is_system_reminder(text):
|
|
continue
|
|
|
|
# Skip tool results (user messages that are just tool outputs)
|
|
if entry_type == "user" and isinstance(content, list):
|
|
has_tool_result = any(
|
|
isinstance(b, dict) and b.get("type") == "tool_result"
|
|
for b in content
|
|
)
|
|
if has_tool_result and not text:
|
|
continue
|
|
|
|
timestamp = entry.get("timestamp", "")
|
|
|
|
messages.append({
|
|
"role": "user" if entry_type == "user" else "assistant",
|
|
"text": text,
|
|
"timestamp": timestamp,
|
|
})
|
|
|
|
return messages
|
|
|
|
|
|
def format_timestamp(ts: str) -> str:
|
|
"""Format ISO timestamp to readable form."""
|
|
try:
|
|
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
return dt.strftime("%Y-%m-%d %I:%M %p")
|
|
except (ValueError, AttributeError):
|
|
return ts[:19] if ts else "?"
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Extract companion session transcripts")
|
|
parser.add_argument("--since", help="Only include sessions after this date (YYYY-MM-DD)", default="2026-03-01")
|
|
parser.add_argument("--output", "-o", help="Output file path", default=None)
|
|
args = parser.parse_args()
|
|
|
|
since_date = args.since
|
|
sessions = []
|
|
|
|
print(f"Scanning {JSONL_DIR} for companion sessions since {since_date}...", file=sys.stderr)
|
|
|
|
for filepath in sorted(JSONL_DIR.glob("*.jsonl")):
|
|
stat = filepath.stat()
|
|
file_date = datetime.fromtimestamp(stat.st_mtime).strftime("%Y-%m-%d")
|
|
if file_date < since_date:
|
|
continue
|
|
|
|
is_companion, first_ts = is_companion_session(filepath)
|
|
if is_companion:
|
|
sessions.append((filepath, first_ts or file_date))
|
|
print(f" Found: {filepath.name} ({format_timestamp(first_ts or '')})", file=sys.stderr)
|
|
|
|
if not sessions:
|
|
print("No companion sessions found.", file=sys.stderr)
|
|
sys.exit(0)
|
|
|
|
sessions.sort(key=lambda x: x[1])
|
|
print(f"\nFound {len(sessions)} companion session(s).\n", file=sys.stderr)
|
|
|
|
output_lines = []
|
|
output_lines.append("# Combined Companion Session Transcript")
|
|
output_lines.append(f"# Generated: {datetime.now().strftime('%Y-%m-%d %I:%M %p')}")
|
|
output_lines.append(f"# Sessions: {len(sessions)} (since {since_date})")
|
|
output_lines.append("")
|
|
|
|
for filepath, first_ts in sessions:
|
|
session_id = filepath.stem
|
|
output_lines.append(f"---")
|
|
output_lines.append(f"## Session: {session_id}")
|
|
output_lines.append(f"**Started**: {format_timestamp(first_ts)}")
|
|
output_lines.append("")
|
|
|
|
messages = extract_transcript(filepath)
|
|
|
|
if not messages:
|
|
output_lines.append("*(No text messages extracted)*")
|
|
output_lines.append("")
|
|
continue
|
|
|
|
for msg in messages:
|
|
ts = format_timestamp(msg["timestamp"])
|
|
role = "**Vicky**" if msg["role"] == "user" else "**Assistant**"
|
|
text = msg["text"]
|
|
|
|
# Truncate very long assistant messages (handoff content etc)
|
|
if msg["role"] == "assistant" and len(text) > 2000:
|
|
text = text[:500] + "\n\n*[... truncated — full content in handoff/file ...]*"
|
|
|
|
output_lines.append(f"[{ts}] {role}: {text}")
|
|
output_lines.append("")
|
|
|
|
result = "\n".join(output_lines)
|
|
|
|
if args.output:
|
|
output_path = Path(args.output)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(result)
|
|
print(f"Written to {args.output}", file=sys.stderr)
|
|
else:
|
|
print(result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|