feat(mr-number): OCR extraction mode + lookup script + installer
mr_lookup.py: lazy vision-SDK import + --extract ocr (tesseract, no SDK) so the tool runs on the redroid cloud box. extract_via_ocr parses both the 'Recent reports' summary and 'User reports' detail layouts → reports/red_flags/verdict, with escort- relevant negative keywords (ghosted, pressured, time waster, FT, lowball, flake). lookup.sh: one-command cloud preview (SSH → drive app + OCR on the box → print verdict). install.sh: idempotent setup of droplet (tesseract+tool+services), plum (deps+MCP SDK+mrlookup on PATH), and the Claude Desktop MCP (run while Desktop quit so it sticks past the clobber). Verified end-to-end: real lookup → denied + red flags. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
d95b0669be
commit
3be170946e
3 changed files with 262 additions and 19 deletions
95
users/transquinnftw/tools/mr-number-lookup/install.sh
Executable file
95
users/transquinnftw/tools/mr-number-lookup/install.sh
Executable file
|
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# install.sh — set up the Mr. Number lookup feature end to end.
|
||||
#
|
||||
# Idempotent. Run from plum. RUN IT WHILE CLAUDE DESKTOP IS QUIT — the desktop
|
||||
# config is rewritten on quit, so adding the MCP while Desktop runs gets clobbered;
|
||||
# adding it while quit makes it stick.
|
||||
#
|
||||
# 1. DROPLET (redroid box): tesseract + the tool + the OCR/keyboard services.
|
||||
# 2. PLUM: local deps (tesseract/adb), the MCP SDK, and the `mrlookup` command.
|
||||
# 3. CLAUDE DESKTOP: register the quinn-mr-number MCP (durably, while quit).
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
HOST="${MR_NUMBER_HOST:-root@45.55.191.82}"
|
||||
KEY="${MR_NUMBER_KEY:-$HOME/.ssh/id_ed25519_1984}"
|
||||
TOOL_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REMOTE_DIR="/opt/mr-number"
|
||||
DESKTOP_CFG="$HOME/Library/Application Support/Claude/claude_desktop_config.json"
|
||||
BUN="$(command -v bun || echo "$HOME/.bun/bin/bun")"
|
||||
|
||||
say() { printf '\n\033[1;36m== %s\033[0m\n' "$*"; }
|
||||
ok() { printf ' \033[32m✓\033[0m %s\n' "$*"; }
|
||||
warn() { printf ' \033[33m!\033[0m %s\n' "$*"; }
|
||||
|
||||
ssh_box() { ssh -i "$KEY" -o ConnectTimeout=15 "$HOST" "$@"; }
|
||||
scp_box() { scp -i "$KEY" -o ConnectTimeout=15 "$1" "$HOST:$2"; }
|
||||
|
||||
# ── 1. DROPLET ──────────────────────────────────────────────────────────────
|
||||
say "1/3 Droplet ($HOST)"
|
||||
if ! ssh_box 'echo ok' >/dev/null 2>&1; then
|
||||
warn "cannot reach $HOST with $KEY — skipping droplet setup"
|
||||
else
|
||||
ssh_box 'command -v tesseract >/dev/null || { export DEBIAN_FRONTEND=noninteractive; apt-get update -qq && apt-get install -y -qq tesseract-ocr; }'
|
||||
ok "tesseract present: $(ssh_box 'tesseract --version 2>&1 | head -1')"
|
||||
|
||||
ssh_box "mkdir -p $REMOTE_DIR /opt/mrnumber-ocr /opt/adb-web"
|
||||
scp_box "$TOOL_DIR/mr_lookup.py" "$REMOTE_DIR/mr_lookup.py"
|
||||
scp_box "$TOOL_DIR/cloud-setup/ocr-service/server.py" "/opt/mrnumber-ocr/server.py"
|
||||
scp_box "$TOOL_DIR/cloud-setup/ocr-service/mrnumber-ocr.service" "/etc/systemd/system/mrnumber-ocr.service"
|
||||
scp_box "$TOOL_DIR/cloud-setup/adb-keyboard/server.py" "/opt/adb-web/server.py"
|
||||
ok "tool + services copied to droplet"
|
||||
|
||||
ssh_box 'systemctl daemon-reload \
|
||||
&& systemctl enable --now mrnumber-ocr >/dev/null 2>&1 \
|
||||
&& systemctl restart adb-web 2>/dev/null || true'
|
||||
ssh_box 'for s in mrnumber-ocr adb-web; do printf " %s: " "$s"; systemctl is-active "$s"; done'
|
||||
ok "services enabled (mrnumber-ocr :8003, adb-web :8001)"
|
||||
fi
|
||||
|
||||
# ── 2. PLUM ─────────────────────────────────────────────────────────────────
|
||||
say "2/3 Plum (local)"
|
||||
if command -v brew >/dev/null; then
|
||||
command -v tesseract >/dev/null || brew install tesseract >/dev/null && ok "tesseract" || warn "tesseract install skipped"
|
||||
command -v adb >/dev/null || brew install android-platform-tools >/dev/null && ok "adb" || warn "adb install skipped"
|
||||
else
|
||||
warn "no brew — ensure tesseract + adb are installed for the MCP path"
|
||||
fi
|
||||
|
||||
# MCP SDK (public npm; bun auto-resolves, but materialize for Desktop's minimal PATH)
|
||||
if [ -x "$BUN" ]; then
|
||||
( cd "$TOOL_DIR/mcp" && "$BUN" install >/dev/null 2>&1 ) && ok "MCP SDK installed (mcp/)" || warn "bun install failed in mcp/"
|
||||
else
|
||||
warn "bun not found — the quinn-mr-number MCP needs it on PATH"
|
||||
fi
|
||||
|
||||
# `mrlookup` on PATH
|
||||
chmod +x "$TOOL_DIR/lookup.sh"
|
||||
for BINDIR in "$HOME/Code/@scripts/session-tools" "$HOME/.local/bin" "/usr/local/bin"; do
|
||||
if [ -d "$BINDIR" ] && [ -w "$BINDIR" ]; then
|
||||
ln -sf "$TOOL_DIR/lookup.sh" "$BINDIR/mrlookup" && ok "mrlookup → $BINDIR/mrlookup" && break
|
||||
fi
|
||||
done
|
||||
|
||||
# ── 3. CLAUDE DESKTOP MCP (run while Desktop is QUIT) ───────────────────────
|
||||
say "3/3 Claude Desktop MCP"
|
||||
if pgrep -f "Claude.app/Contents/MacOS/Claude" >/dev/null 2>&1; then
|
||||
warn "Claude Desktop is RUNNING — quit it and re-run, or this entry will be clobbered on next quit."
|
||||
fi
|
||||
INDEX="$TOOL_DIR/mcp/index.ts"
|
||||
python3 - "$DESKTOP_CFG" "$INDEX" "$BUN" <<'PY'
|
||||
import json, os, sys
|
||||
cfg, index, bun = sys.argv[1], sys.argv[2], sys.argv[3]
|
||||
if not os.path.exists(cfg):
|
||||
print(" ! desktop config not found:", cfg); sys.exit(0)
|
||||
d = json.load(open(cfg))
|
||||
servers = d.setdefault("mcpServers", {})
|
||||
servers["quinn-mr-number"] = {"command": bun, "args": ["run", index]}
|
||||
json.dump(d, open(cfg, "w"), indent=2); open(cfg, "a").write("\n")
|
||||
print(" \033[32m✓\033[0m quinn-mr-number registered (servers:", ", ".join(servers) + ")")
|
||||
PY
|
||||
|
||||
say "Done."
|
||||
echo " Quick preview: mrlookup +15551234567"
|
||||
echo " Record a verdict: use the quinn-mr-number MCP (restart Claude Desktop first)."
|
||||
55
users/transquinnftw/tools/mr-number-lookup/lookup.sh
Executable file
55
users/transquinnftw/tools/mr-number-lookup/lookup.sh
Executable file
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# lookup — screen a phone number against Mr. Number via the cloud redroid box.
|
||||
#
|
||||
# Runs the lookup ENTIRELY on the DO droplet (45.55.191.82): adb drives the app,
|
||||
# tesseract OCRs the reports — no vision SDK, no tunnel, one SSH round-trip. Prints
|
||||
# the community reports + a suggested screening verdict.
|
||||
#
|
||||
# Preview by default (does NOT record). Recording into the screening gate goes
|
||||
# through the quinn-mr-number MCP (token stays on plum) — see README.
|
||||
#
|
||||
# lookup +15551234567
|
||||
# lookup "+1 555 123 4567" # any format; cleaned before input
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
HOST="${MR_NUMBER_HOST:-root@45.55.191.82}"
|
||||
KEY="${MR_NUMBER_KEY:-$HOME/.ssh/id_ed25519_1984}"
|
||||
REMOTE_DIR="${MR_NUMBER_REMOTE_DIR:-/opt/mr-number}"
|
||||
SERIAL="${MR_NUMBER_REDROID_SERIAL:-localhost:5555}"
|
||||
|
||||
PHONE="${1:-}"
|
||||
if [ -z "$PHONE" ]; then
|
||||
echo "usage: lookup <phone> e.g. lookup +15551234567" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Single SSH round-trip: drive the app + OCR on the box, emit one JSON object.
|
||||
JSON="$(ssh -i "$KEY" -o ConnectTimeout=15 "$HOST" \
|
||||
"cd '$REMOTE_DIR' && python3 mr_lookup.py --json --extract ocr --dry-run --device '$SERIAL' --phone '$PHONE'")"
|
||||
|
||||
# Pretty-print the result for a human (comma-style prints — no nested quotes so it
|
||||
# stays valid inside the bash single-quoted heredoc).
|
||||
printf '%s' "$JSON" | python3 -c '
|
||||
import sys, json
|
||||
d = json.load(sys.stdin)
|
||||
if d.get("error"):
|
||||
print("ERROR:", d.get("message") or d["error"]); sys.exit(1)
|
||||
ex = d.get("extracted") or {}
|
||||
print()
|
||||
print(" phone ", d.get("phone"))
|
||||
print(" verdict ", d.get("result"), " (suggested:", ex.get("suggested_result"), ")")
|
||||
if ex.get("classification"): print(" caller ", ex.get("classification"))
|
||||
if ex.get("report_count") is not None: print(" reports ", ex.get("report_count"))
|
||||
flags = ex.get("red_flags") or []
|
||||
if flags: print(" red flags ", "; ".join(flags))
|
||||
reps = ex.get("reports") or []
|
||||
if reps:
|
||||
print(" recent:")
|
||||
for r in reps[:6]: print(" -", r)
|
||||
print()
|
||||
print(" screenshot ", d.get("screenshot"), "(on the box)")
|
||||
print(" (preview only - not recorded; use the quinn-mr-number MCP to record)")
|
||||
print()
|
||||
'
|
||||
|
|
@ -35,19 +35,24 @@ from typing import Any
|
|||
# requests is only needed for the final recording step (guarded import so unit tests
|
||||
# can run in environments without it; the emulator path itself is fully testable).
|
||||
|
||||
# --- Vision SDK (exact same pattern as codebase/@features/ad-watch/scripts/classify_photos.py)
|
||||
_SDK_SRC = os.environ.get(
|
||||
"CLAUDE_CODE_BATCH_SDK_PATH",
|
||||
str(Path.home() / "Code/@applications/@ml/@packages/@py/claude-code-batch-sdk/src"),
|
||||
)
|
||||
if _SDK_SRC not in sys.path:
|
||||
sys.path.insert(0, _SDK_SRC)
|
||||
|
||||
try:
|
||||
from claude_code_batch_sdk import ClaudeClient, parse_json_response # noqa: E402
|
||||
except ImportError:
|
||||
print("ERROR: claude-code-batch-sdk not found. Set CLAUDE_CODE_BATCH_SDK_PATH or clone it to the expected location.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
# --- Vision SDK loaded lazily (only the --extract vision path needs it). The OCR
|
||||
# path (--extract ocr) runs with stdlib + tesseract only, so the tool works on the
|
||||
# redroid cloud box where the batch SDK is absent.
|
||||
def _load_vision_sdk():
|
||||
"""Import claude-code-batch-sdk on demand; exit clearly if it's missing."""
|
||||
sdk_src = os.environ.get(
|
||||
"CLAUDE_CODE_BATCH_SDK_PATH",
|
||||
str(Path.home() / "Code/@applications/@ml/@packages/@py/claude-code-batch-sdk/src"),
|
||||
)
|
||||
if sdk_src not in sys.path:
|
||||
sys.path.insert(0, sdk_src)
|
||||
try:
|
||||
from claude_code_batch_sdk import ClaudeClient, parse_json_response # noqa: E402
|
||||
except ImportError:
|
||||
print("ERROR: claude-code-batch-sdk not found (needed for --extract vision). "
|
||||
"Set CLAUDE_CODE_BATCH_SDK_PATH, or use --extract ocr.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return ClaudeClient, parse_json_response
|
||||
|
||||
# --- Config / env
|
||||
QUINN_MY_URL = (os.environ.get("QUINN_MY_URL") or "https://my.transquinnftw.com").rstrip("/")
|
||||
|
|
@ -90,6 +95,7 @@ def _build_vision_prompt(screenshot_path: str, phone: str) -> str:
|
|||
)
|
||||
|
||||
async def _extract_from_screenshot(screenshot_path: str, phone: str) -> dict[str, Any]:
|
||||
ClaudeClient, parse_json_response = _load_vision_sdk()
|
||||
client = ClaudeClient(model="haiku", max_concurrent=1) # haiku is fast and sufficient for text extraction
|
||||
prompt = _build_vision_prompt(str(screenshot_path), phone)
|
||||
|
||||
|
|
@ -108,6 +114,86 @@ async def _extract_from_screenshot(screenshot_path: str, phone: str) -> dict[str
|
|||
|
||||
return parsed
|
||||
|
||||
# --- OCR extraction (no vision SDK) — for the redroid cloud box where tesseract is
|
||||
# present but the batch SDK is not. Produces the SAME dict shape as the vision path.
|
||||
_OCR_NEG_KW = ("no show", "noshow", "no-show", "rude", "cop", "police", "scam", "spam",
|
||||
"fraud", "timewaste", "time waster", "time-waster", "ghost", "pressured",
|
||||
"harass", "boundary", "unsafe", "aggressive", "fake", "flake", "lowball",
|
||||
"haggle", "facetime", "wants to ft", "creep", "rob", "danger")
|
||||
_OCR_DATE_RE = re.compile(
|
||||
r"^[a-z]{0,2}\s*(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\.?\s+\d", re.I)
|
||||
# Header that begins the reports list — the summary screen says "Recent reports",
|
||||
# the detail screen says "User reports", some builds "Community reports".
|
||||
_OCR_REPORTS_HEADER = re.compile(r"\b(recent|user|community)\s+reports\b", re.I)
|
||||
_OCR_SKIP = ("report caller", "recent reports", "user reports", "community reports",
|
||||
"view all", "look up", "search", "block", "unblock", "add contact",
|
||||
"call", "message", "text", "report number", "is this you")
|
||||
|
||||
|
||||
def _tesseract(image_path: str, psm: int = 6) -> str:
|
||||
"""OCR an image with tesseract. Configurable via TESSERACT_BIN."""
|
||||
binary = os.environ.get("TESSERACT_BIN", "tesseract")
|
||||
proc = subprocess.run([binary, image_path, "stdout", "--psm", str(psm)],
|
||||
capture_output=True, text=True, timeout=60)
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"tesseract failed (rc={proc.returncode}): {proc.stderr[:300]}")
|
||||
return proc.stdout
|
||||
|
||||
|
||||
def extract_via_ocr(image_path: str, phone: str) -> dict[str, Any]:
|
||||
"""Parse the Mr. Number reports screen from OCR text into the extract dict."""
|
||||
text = _tesseract(image_path)
|
||||
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
|
||||
|
||||
count = None
|
||||
classification = None
|
||||
reports: list[str] = []
|
||||
capturing = False
|
||||
for ln in lines:
|
||||
low = ln.lower()
|
||||
m = re.search(r"view all (\d+)\s+report", low)
|
||||
if m:
|
||||
count = int(m.group(1))
|
||||
if classification is None and re.search(r"spam|scam|telemarket|fraud", low):
|
||||
classification = ln
|
||||
if _OCR_REPORTS_HEADER.search(low):
|
||||
capturing = True
|
||||
continue
|
||||
if "view all" in low: # summary screen: reports end at the "View all N" link
|
||||
capturing = False
|
||||
continue
|
||||
if not capturing:
|
||||
continue
|
||||
if any(s in low for s in _OCR_SKIP) or _OCR_DATE_RE.match(ln):
|
||||
continue
|
||||
# Drop short OCR noise (icon glyphs come through as 1-2 chars like "q", "CQ)").
|
||||
cleaned = re.sub(r"^[^a-zA-Z0-9]+", "", ln).strip()
|
||||
if len(cleaned) >= 4:
|
||||
reports.append(cleaned)
|
||||
|
||||
red_flags = [r for r in reports if any(k in r.lower() for k in _OCR_NEG_KW)]
|
||||
if classification and re.search(r"spam|scam|fraud", classification.lower()):
|
||||
red_flags.append(classification)
|
||||
summary = "; ".join(reports[:3]) or (classification or "no reports parsed")
|
||||
if red_flags:
|
||||
suggested = "denied"
|
||||
elif count or reports:
|
||||
suggested = "approved"
|
||||
else:
|
||||
suggested = "not_found"
|
||||
return {
|
||||
"phone": phone,
|
||||
"report_count": count,
|
||||
"reports": reports,
|
||||
"classification": classification,
|
||||
"red_flags": red_flags,
|
||||
"summary": summary,
|
||||
"suggested_result": suggested,
|
||||
"extractor": "ocr",
|
||||
"ocr_text": text,
|
||||
}
|
||||
|
||||
|
||||
# --- adb helpers (refactored into class for testability of the emulator method)
|
||||
class MrNumberEmulator:
|
||||
"""Encapsulates adb interactions with the Mr. Number app in an emulator.
|
||||
|
|
@ -346,7 +432,8 @@ def record_screening(client_id: int, phone: str, result: str, raw: str) -> dict[
|
|||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
async def main_async(phone: str, client_id: int | None, dry_run: bool, dump_ui: bool = False) -> dict[str, Any]:
|
||||
async def main_async(phone: str, client_id: int | None, dry_run: bool, dump_ui: bool = False,
|
||||
extract: str = "vision") -> dict[str, Any]:
|
||||
log(f"[mr-number] Starting lookup for {phone} on {DEVICE} (client_id={client_id}, dry_run={dry_run})")
|
||||
|
||||
input_phone = clean_phone(phone)
|
||||
|
|
@ -394,10 +481,14 @@ async def main_async(phone: str, client_id: int | None, dry_run: bool, dump_ui:
|
|||
shot = take_screenshot(input_phone)
|
||||
log(f"[mr-number] Screenshot saved to {shot}")
|
||||
|
||||
# 3. Vision extraction
|
||||
log("[mr-number] Running vision extraction on screenshot...")
|
||||
extracted = await _extract_from_screenshot(str(shot), phone)
|
||||
log("[mr-number] Extraction:", json.dumps(extracted, indent=2)[:800])
|
||||
# 3. Extraction — OCR (tesseract, no SDK) or vision (claude batch SDK)
|
||||
if extract == "ocr":
|
||||
log("[mr-number] Running OCR extraction (tesseract) on screenshot...")
|
||||
extracted = extract_via_ocr(str(shot), phone)
|
||||
else:
|
||||
log("[mr-number] Running vision extraction on screenshot...")
|
||||
extracted = await _extract_from_screenshot(str(shot), phone)
|
||||
log("[mr-number] Extraction:", json.dumps({k: v for k, v in extracted.items() if k != 'ocr_text'}, indent=2)[:800])
|
||||
|
||||
# 4. Decide result + build raw
|
||||
result = decide_result(extracted)
|
||||
|
|
@ -451,6 +542,8 @@ def main() -> None:
|
|||
parser.add_argument("--device", default=DEVICE, help="adb serial (default emulator-5554)")
|
||||
parser.add_argument("--dump-ui", action="store_true", help="Always dump the current UI hierarchy before actions (for calibration/troubleshooting)")
|
||||
parser.add_argument("--json", action="store_true", help="Emit one JSON result object on stdout (progress to stderr). Used by the mr-number MCP.")
|
||||
parser.add_argument("--extract", choices=["vision", "ocr"], default="vision",
|
||||
help="Extraction backend: 'vision' (claude batch SDK) or 'ocr' (tesseract, no SDK — for the redroid cloud box).")
|
||||
args = parser.parse_args()
|
||||
|
||||
DEVICE = args.device
|
||||
|
|
@ -471,7 +564,7 @@ def main() -> None:
|
|||
log(get_ui_dump()[:2000] + "\n... (truncated)")
|
||||
|
||||
import asyncio
|
||||
result = asyncio.run(main_async(args.phone, args.client_id, args.dry_run, args.dump_ui))
|
||||
result = asyncio.run(main_async(args.phone, args.client_id, args.dry_run, args.dump_ui, args.extract))
|
||||
if JSON_MODE:
|
||||
print(json.dumps(result))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue