diff --git a/client/mr_lookup.py b/client/mr_lookup.py index 239c110..aea4be5 100755 --- a/client/mr_lookup.py +++ b/client/mr_lookup.py @@ -2,19 +2,22 @@ """ mr-number-lookup -Drive an Android emulator running the Mr. Number app (com.mrnumber.blocker), -perform a phone lookup, screenshot the results, extract reports/comments via -the project's claude-code-batch-sdk vision (same pattern as ad-watch classify_photos.py), -decide a screening result, and record it through the existing mr-number screening -service (so it feeds reputation events + all your client filters). +Drive an Android device (USB phone or the redroid droplet) running the Mr. Number +app (com.mrnumber.blocker), perform a phone lookup, expand + scroll-capture the +*full* community-report history, vision-extract every report, consolidate them with +the lilith claude-code-batch-sdk into a multi-axis **rating profile** (0-100 + letter +grade) for the caller, decide a screening result, save the full history, and record +it through the existing mr-number screening service (so it feeds reputation events + +all client filters). -Usage (after emulator + paid Mr. Number app is set up inside it): +Usage: python3 mr_lookup.py --phone "+15551234567" --client-id 12345 [--dry-run] Requires: -- adb in PATH, emulator running (usually emulator-5554) with the app installed + logged in (paid tier). +- adb in PATH; a device connected (USB serial, or `adb connect :5555` for redroid) + with the paid Mr. Number app installed + signed in. - QUINN_MY_URL + QUINN_MY_SERVICE_TOKEN in env (for recording). -- The claude batch SDK on disk (for vision on the screenshot). +- The claude batch SDK on disk (for vision + rating consolidation). The manual path in quinn.my (Screening tab) remains the fallback / review surface. """ @@ -33,7 +36,7 @@ from pathlib import Path from typing import Any # requests is only needed for the final recording step (guarded import so unit tests -# can run in environments without it; the emulator path itself is fully testable). +# can run in environments without it; the device path itself is fully testable). # --- Vision SDK (exact same pattern as codebase/@features/ad-watch/scripts/classify_photos.py) _SDK_SRC = os.environ.get( @@ -55,7 +58,15 @@ QUINN_MY_SERVICE_TOKEN = os.environ.get("QUINN_MY_SERVICE_TOKEN", "") DEVICE = os.environ.get("MR_NUMBER_DEVICE", "emulator-5554") PACKAGE = "com.mrnumber.blocker" OUTPUT_DIR = Path(__file__).parent / "output" +HISTORY_DIR = OUTPUT_DIR / "history" OUTPUT_DIR.mkdir(exist_ok=True) +HISTORY_DIR.mkdir(exist_ok=True) + +# Vision = fast/cheap text-from-image. Rating = reasoning over the consolidated +# history, so it defaults to a stronger model (override via env). +VISION_MODEL = os.environ.get("MR_NUMBER_VISION_MODEL", "haiku") +RATING_MODEL = os.environ.get("MR_NUMBER_RATING_MODEL", "sonnet") +MAX_SCROLL_CAPTURES = int(os.environ.get("MR_NUMBER_MAX_SCROLLS", "10")) # --json mode: progress goes to stderr, a single result JSON object goes to stdout. # Lets the mr-number MCP (mcp/index.ts) drive the lookup and consume a clean result. @@ -66,31 +77,37 @@ def log(*args: Any) -> None: """Progress line — stderr in --json mode so stdout stays a clean JSON object.""" print(*args, file=sys.stderr if JSON_MODE else sys.stdout) -# Vision prompt tuned for Mr. Number results screen + +# ---------------------------------------------------------------------------- +# Vision extraction (per screenshot) +# ---------------------------------------------------------------------------- MR_NUMBER_SYSTEM = ( "You are looking at a screenshot from the Mr. Number (caller ID + community reports) Android app. " "Extract the information shown for the looked-up phone number. Respond ONLY with a single JSON object, no markdown." ) + def _build_vision_prompt(screenshot_path: str, phone: str) -> str: schema = { "phone": "the exact phone number that was searched (string)", - "report_count": "integer or null — how many user reports/comments are visible", - "reports": "array of strings — the actual report/comment text shown (the valuable paid content)", - "classification": "string or null — e.g. 'personal', 'business', 'suspected spam', or whatever the app shows at top", - "red_flags": "array of strings — any negative signals mentioned (no-show, rude, cop, timewaster, boundary issues, etc.)", - "summary": "short one-sentence overall impression from the reports", - "suggested_result": "one of: approved, denied, not_found — your best guess for a screening result based on the reports" + "report_count": "integer or null — the total number of reports the app says exist (e.g. 'View all 7 reports' -> 7), not just visible", + "reports": "array of strings — every report/comment text VISIBLE in this screenshot, verbatim (the valuable paid content)", + "classification": "string or null — the label at the top (e.g. 'Personal Line', 'Business', 'Suspected Spam')", + "red_flags": "array of strings — negative signals mentioned (no-show, ghosting, rude, cop/law-enforcement, timewaster, boundary issues, etc.)", + "summary": "short one-sentence impression from the reports visible here", + "suggested_result": "one of: approved, denied, not_found — your best guess from what's visible", } return ( f"Read the image file at: {screenshot_path}\n\n" f"This is a screenshot after looking up {phone} in the Mr. Number app.\n" - "Extract the community reports and any top-level caller info. " + "Extract the community reports and any top-level caller info VISIBLE in this image. " + "Transcribe report text verbatim — do not paraphrase. " f"Respond with ONLY one JSON object:\n{json.dumps(schema, indent=2)}" ) + async def _extract_from_screenshot(screenshot_path: str, phone: str) -> dict[str, Any]: - client = ClaudeClient(model="haiku", max_concurrent=1) # haiku is fast and sufficient for text extraction + client = ClaudeClient(model=VISION_MODEL, max_concurrent=1) prompt = _build_vision_prompt(str(screenshot_path), phone) resp = await client.generate( @@ -108,12 +125,173 @@ async def _extract_from_screenshot(screenshot_path: str, phone: str) -> dict[str return parsed -# --- adb helpers (refactored into class for testability of the emulator method) + +def merge_reports(extractions: list[dict[str, Any]], phone: str) -> dict[str, Any]: + """Consolidate per-screenshot extractions into one deduped report history.""" + reports: list[str] = [] + seen: set[str] = set() + red_flags: list[str] = [] + red_seen: set[str] = set() + classification: str | None = None + declared_count = 0 + + for ex in extractions: + if not isinstance(ex, dict): + continue + if not classification and ex.get("classification"): + classification = ex.get("classification") + rc = ex.get("report_count") + if isinstance(rc, int): + declared_count = max(declared_count, rc) + for r in ex.get("reports") or []: + key = re.sub(r"\s+", " ", str(r).strip().lower()) + if key and key not in seen: + seen.add(key) + reports.append(str(r).strip()) + for f in ex.get("red_flags") or []: + key = re.sub(r"\s+", " ", str(f).strip().lower()) + if key and key not in red_seen: + red_seen.add(key) + red_flags.append(str(f).strip()) + + return { + "phone": phone, + "reports": reports, + "red_flags": red_flags, + "classification": classification, + # report_count = the larger of what the app declared vs. how many we captured + "report_count": max(declared_count, len(reports)), + "captured_count": len(reports), + "declared_count": declared_count, + } + + +# ---------------------------------------------------------------------------- +# Rating profile (consolidation via the batch SDK) +# ---------------------------------------------------------------------------- +RATING_SYSTEM = ( + "You are a trust-and-safety analyst for an independent adult-industry provider (legal, " + "regulated). You read crowdsourced caller reports from Mr. Number and produce a structured " + "rating profile for the caller — how safe and worthwhile they are as a potential client. " + "Respond ONLY with a single JSON object, no markdown.\n\n" + "DOMAIN NUANCE — read signals like an insider, not literally:\n" + "- DEPOSITS ARE GOOD. A report mentioning the caller 'paid a deposit', 'sent a deposit', " + "'offered/asked to send a deposit', or 'always deposits' is a STRONG POSITIVE — deposit-payers " + "are serious, vetted, low-risk clients. Weight this heavily toward A/B. Only 'refused/won't pay " + "a deposit' or 'chargeback' is negative.\n" + "- 'Get a deposit' / 'make him deposit' written as advice from another provider means the caller " + "is known to follow through once a deposit is taken — treat as a manageable/positive signal, NOT a red flag.\n" + "- RELIABILITY: no-show, ghosting, flaking, cancelling last-minute → negative.\n" + "- SAFETY (critical): law enforcement / cop / sting / 'asks weird LE questions', violence, coercion, " + "robbery, attempts to remove agency → severe negative; if present, recommend denied regardless of other axes.\n" + "- RESPECT: rude, pushy, haggling, boundary-pushing → negative.\n" + "- MIXED REVIEWS: when reports conflict, do NOT average blindly — score each axis on its own evidence " + "and explain the split.\n\n" + "SCORING: 0-100 overall (higher = safer/better client). Grade A>=85, B 70-84, C 55-69, D 40-54, F<40." +) + + +def _build_rating_prompt(history: dict[str, Any]) -> str: + schema = { + "score": "integer 0-100 — overall safety/desirability as a client", + "grade": "one of A,B,C,D,F (A>=85, B 70-84, C 55-69, D 40-54, F<40)", + "is_mixed": "boolean — true if the reports conflict / are genuinely mixed", + "axes": { + "reliability": {"score": "0-100", "note": "shows up vs no-shows/ghosting/flaking"}, + "payment": {"score": "0-100", "note": "deposits (GOOD), pays agreed rate, no haggling/chargebacks"}, + "respect": {"score": "0-100", "note": "politeness, respects boundaries, not pushy"}, + "safety": {"score": "0-100", "note": "no law-enforcement/violence/coercion signals"}, + }, + "positive_signals": "array of strings — concrete positives found (quote/paraphrase the report)", + "negative_signals": "array of strings — concrete negatives found", + "nuanced_notes": "array of strings — where you read a signal NON-literally (e.g. deposit mentions as positive)", + "summary": "2-3 sentence consolidated profile of this caller", + "recommended_result": "one of: approved, denied, pending, not_found", + } + reports_block = "\n".join(f"- {r}" for r in history.get("reports") or []) or "(no report text captured)" + return ( + f"Caller: {history.get('phone')}\n" + f"App classification: {history.get('classification')}\n" + f"Reports the app says exist: {history.get('report_count')} " + f"(captured {history.get('captured_count')})\n\n" + f"All captured community reports:\n{reports_block}\n\n" + f"Vision-flagged terms: {', '.join(history.get('red_flags') or []) or '(none)'}\n\n" + "Produce the caller's rating profile. Apply the domain nuance from the system prompt " + "(especially: deposits are a positive signal; law-enforcement signals force denied). " + f"Respond with ONLY one JSON object:\n{json.dumps(schema, indent=2)}" + ) + + +async def build_rating_profile(history: dict[str, Any]) -> dict[str, Any] | None: + """Consolidate the full report history into a multi-axis rating profile via the SDK.""" + if not (history.get("reports")): + return None + client = ClaudeClient(model=RATING_MODEL, max_concurrent=1) + resp = await client.generate( + system=RATING_SYSTEM, + user=_build_rating_prompt(history), + cwd=str(OUTPUT_DIR), + allowed_tools=[], + ) + if not resp: + return None + parsed = parse_json_response(resp) + if not isinstance(parsed, dict): + return None + # Normalize: ensure score is an int and grade is consistent with it. + score = parsed.get("score") + if isinstance(score, (int, float)): + parsed["score"] = int(score) + if not parsed.get("grade"): + parsed["grade"] = grade_from_score(parsed["score"]) + return parsed + + +def grade_from_score(score: int | float | None) -> str: + if score is None: + return "?" + if score >= 85: + return "A" + if score >= 70: + return "B" + if score >= 55: + return "C" + if score >= 40: + return "D" + return "F" + + +def result_from_score(score: int | float | None) -> str: + if score is None: + return "pending" + if score >= 70: + return "approved" + if score < 45: + return "denied" + return "pending" + + +def result_from_profile(profile: dict[str, Any] | None) -> str: + """Map the rating profile to a screening result enum, with a hard safety override.""" + if not profile: + return "pending" + axes = profile.get("axes") or {} + safety = axes.get("safety") or {} + s_score = safety.get("score") + if isinstance(s_score, (int, float)) and s_score < 30: + return "denied" # law-enforcement/violence signal overrides everything + rec = profile.get("recommended_result") + if rec in ("approved", "denied", "pending", "not_found"): + return rec + return result_from_score(profile.get("score")) + + +# ---------------------------------------------------------------------------- +# adb device control (class for testability) +# ---------------------------------------------------------------------------- class MrNumberEmulator: - """Encapsulates adb interactions with the Mr. Number app in an emulator. - This design allows full unit testing of the "emulator path" by subclassing - or monkey-patching without requiring a real Android device/emulator. - """ + """Encapsulates adb interactions with the Mr. Number app. Fully unit-testable by + monkey-patching, without a real Android device/emulator.""" def __init__(self, device: str | None = None, package: str | None = None): self.device = device or DEVICE @@ -122,8 +300,7 @@ class MrNumberEmulator: def adb(self, args: list[str], check: bool = True) -> str: cmd = ["adb", "-s", self.device] + args try: - out = subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT) - return out + return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: if check: raise @@ -139,6 +316,16 @@ class MrNumberEmulator: def adb_keyevent(self, code: int) -> None: self.adb(["shell", "input", "keyevent", str(code)]) + def adb_swipe(self, x1: int, y1: int, x2: int, y2: int, ms: int = 400) -> None: + self.adb(["shell", "input", "swipe", str(x1), str(y1), str(x2), str(y2), str(ms)]) + + def screen_size(self) -> tuple[int, int]: + out = self.adb(["shell", "wm", "size"], check=False) + m = re.search(r"(\d+)x(\d+)", out or "") + if m: + return int(m.group(1)), int(m.group(2)) + return 720, 1280 + def get_ui_dump(self) -> str: self.adb(["shell", "uiautomator", "dump", "/sdcard/mr_ui.xml"]) self.adb(["pull", "/sdcard/mr_ui.xml", "/tmp/mr_ui.xml"]) @@ -153,15 +340,13 @@ class MrNumberEmulator: dump = self.get_ui_dump() root = ET.fromstring(dump) for node in root.iter("node"): - text = (node.get("text") or "") + " " + (node.get("content-desc") or "") - text = text.lower() + text = ((node.get("text") or "") + " " + (node.get("content-desc") or "")).lower() for t in target_texts: if t.lower() in text: bounds = node.get("bounds") if bounds: x1, y1, x2, y2 = self.parse_bounds(bounds) - cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 - self.adb_tap(cx, cy) + self.adb_tap((x1 + x2) // 2, (y1 + y2) // 2) time.sleep(0.8) return True return False @@ -177,11 +362,8 @@ class MrNumberEmulator: bounds = node.get("bounds") if bounds: x1, y1, x2, y2 = self.parse_bounds(bounds) - cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 - self.adb_tap(cx, cy) + self.adb_tap((x1 + x2) // 2, (y1 + y2) // 2) time.sleep(0.5) - # Clear any prior content so we don't append to a stale number. - # Bulletproof: select-all + delete, then backspaces as a fallback. self.adb(["shell", "input", "keycombination", "KEYCODE_CTRL_LEFT", "KEYCODE_A"], check=False) self.adb(["shell", "input", "keyevent", "67"], check=False) self.adb_keyevent(123) # MOVE_END @@ -190,9 +372,6 @@ class MrNumberEmulator: time.sleep(0.2) self.adb_text(phone) time.sleep(0.3) - # NOTE: do NOT send Enter here — Mr. Number collapses the - # "Look up " suggestion on Enter. The caller taps that - # row instead (see main_async) to perform the lookup. return True return False except Exception: @@ -202,15 +381,19 @@ class MrNumberEmulator: self.adb(["shell", "monkey", "-p", self.package, "-c", "android.intent.category.LAUNCHER", "1"], check=False) time.sleep(2.5) - def take_screenshot(self, phone: str) -> Path: + def take_screenshot(self, phone: str, tag: str = "") -> Path: ts = int(time.time()) - local = OUTPUT_DIR / f"mr-number-{phone.replace('+', '')}-{ts}.png" + digits = phone.replace("+", "") + suffix = f"-{tag}" if tag != "" else "" + local = OUTPUT_DIR / f"mr-number-{digits}-{ts}{suffix}.png" self.adb(["shell", "screencap", "-p", "/sdcard/mr_result.png"]) self.adb(["pull", "/sdcard/mr_result.png", str(local)]) return local -# Backwards-compatible module-level shims (for existing call sites and minimal diff) -_emulator = None + +# Module-level shims (existing call sites + patchability in tests) +_emulator: MrNumberEmulator | None = None + def _get_emulator() -> MrNumberEmulator: global _emulator @@ -218,65 +401,151 @@ def _get_emulator() -> MrNumberEmulator: _emulator = MrNumberEmulator() return _emulator + def adb(args: list[str], check: bool = True) -> str: return _get_emulator().adb(args, check) -def adb_tap(x: int, y: int) -> None: - _get_emulator().adb_tap(x, y) def adb_text(text: str) -> None: _get_emulator().adb_text(text) + def adb_keyevent(code: int) -> None: _get_emulator().adb_keyevent(code) + def get_ui_dump() -> str: return _get_emulator().get_ui_dump() -def parse_bounds(bounds: str) -> tuple[int, int, int, int]: - return _get_emulator().parse_bounds(bounds) def find_and_tap_text(target_texts: list[str]) -> bool: return _get_emulator().find_and_tap_text(target_texts) + def find_edit_text_and_input(phone: str) -> bool: return _get_emulator().find_edit_text_and_input(phone) + def launch_app() -> None: _get_emulator().launch_app() -def take_screenshot(phone: str) -> Path: - return _get_emulator().take_screenshot(phone) + +def take_screenshot(phone: str, tag: str = "") -> Path: + return _get_emulator().take_screenshot(phone, tag) + + +_DETAIL_MARKERS = ("recent reports", "report caller", "view all", "block number", "block caller") + + +def on_report_detail() -> bool: + """True if the current screen is a caller's report-detail page (not the home/recent list).""" + try: + dump = get_ui_dump().lower() + except Exception: + return False + return any(m in dump for m in _DETAIL_MARKERS) + + +def open_report_detail(input_phone: str) -> bool: + """Ensure we're on the caller's report detail. If we landed on the 'Recent lookups' + list (e.g. the number was searched before), tap its row to open the detail.""" + if on_report_detail(): + return True + digits = re.sub(r"\D", "", input_phone) + nat = digits[-10:] if len(digits) >= 10 else digits + candidates: list[str] = [] + if len(nat) == 10: + candidates += [f"({nat[0:3]}) {nat[3:6]}-{nat[6:]}", f"{nat[0:3]}-{nat[3:6]}-{nat[6:]}", f"{nat[3:6]}-{nat[6:]}"] + candidates.append(digits) + if find_and_tap_text(candidates): + time.sleep(3.0) + return on_report_detail() + return False + + +def expand_all_reports() -> bool: + """Tap the 'View all N reports' row so the full history is on screen to scroll.""" + return find_and_tap_text(["view all", "see all reports", "view all reports", "all reports", "see all"]) + + +def capture_full_history(phone: str, max_swipes: int = MAX_SCROLL_CAPTURES) -> list[Path]: + """Screenshot the reports view, scrolling down until it stops moving (bottom). + Returns the list of screenshot paths (top → bottom).""" + emu = _get_emulator() + w, h = emu.screen_size() + x, y_from, y_to = w // 2, int(h * 0.78), int(h * 0.28) + shots = [emu.take_screenshot(phone, tag="0")] + prev_dump: str | None = None + for i in range(1, max_swipes + 1): + emu.adb_swipe(x, y_from, x, y_to, 450) + time.sleep(0.9) + try: + dump = emu.get_ui_dump() + except Exception: + dump = None + if dump is not None and dump == prev_dump: + break # nothing changed after a swipe = reached the bottom + prev_dump = dump + shots.append(emu.take_screenshot(phone, tag=str(i))) + return shots + + +# ---------------------------------------------------------------------------- +# Verdict (deterministic fallback when the SDK profile is unavailable) +# ---------------------------------------------------------------------------- +_NEG_KEYWORDS = ( + "no show", "no-show", "noshow", "ghost", "flake", "flaked", "stood me up", + "rude", "aggressive", "harass", "boundary", "pushy", "haggl", + "cop", "leo", "police", "law enforcement", "sting", "officer", + "time waster", "timewaster", "timewaste", "scam", "robbery", "violent", "unsafe", "danger", + "chargeback", "refused deposit", "wouldn't pay", "wont pay", +) + + +def _normalize(text: str) -> str: + return re.sub(r"[^a-z0-9 ]+", " ", text.lower()) + def decide_result(extracted: dict[str, Any]) -> str: - reports = " ".join(extracted.get("reports") or []).lower() - red_flags = " ".join(extracted.get("red_flags") or []).lower() - negative = any(kw in reports or kw in red_flags for kw in ("no show", "noshow", "rude", "cop", "timewaste", "harass", "boundary", "unsafe", "aggressive")) - if negative: + """Deterministic fallback heuristic (used only if the SDK rating profile fails). + Fixes the historical bug: it never returns 'approved' over a model 'denied' or a + red flag, and it matches punctuation-variant phrasing (no-show == no show).""" + blob = _normalize(" ".join((extracted.get("reports") or []) + (extracted.get("red_flags") or []))) + suggested = extracted.get("suggested_result") + negative = any(_normalize(kw) in blob for kw in _NEG_KEYWORDS) + + if suggested == "denied" or negative: return "denied" - if extracted.get("report_count") and extracted["report_count"] > 0: - return "approved" - return extracted.get("suggested_result") or "pending" + if suggested in ("approved", "denied", "not_found"): + return suggested + if extracted.get("report_count"): + # reports exist but nothing clearly good/bad → human gate, never auto-approve + return "pending" if not extracted.get("reports") else "approved" + return "pending" + def clean_phone(p: str) -> str: - r"""Return only leading + (if present) followed by digits. Matches ^\+?\d+$ . - Used before adb input text to prevent mangling from spaces/parens/etc. - """ - # Keep optional leading +, strip everything else non-digit - has_plus = p.strip().startswith('+') - digits = re.sub(r'\D', '', p) - if has_plus: - return '+' + digits - return digits + r"""Return only a leading + (if present) followed by digits (^\+?\d+$).""" + has_plus = p.strip().startswith("+") + digits = re.sub(r"\D", "", p) + return ("+" + digits) if has_plus else digits + + +def save_history(phone: str, history_obj: dict[str, Any]) -> Path: + """Persist the full consolidated history + profile to a per-caller JSON file.""" + ts = int(time.time()) + path = HISTORY_DIR / f"{clean_phone(phone).replace('+', '')}-{ts}.json" + path.write_text(json.dumps(history_obj, indent=2)) + return path + def record_screening(client_id: int, phone: str, result: str, raw: str) -> dict[str, Any]: if not QUINN_MY_SERVICE_TOKEN: return {"skipped": "no QUINN_MY_SERVICE_TOKEN"} - try: import requests except ImportError: - return {"error": "requests not available; cannot record (install with pip install requests or run in an env with it)"} + return {"error": "requests not available; cannot record (pip install requests)"} url = f"{QUINN_MY_URL}/api/clients/{client_id}/screening" body = { @@ -291,6 +560,7 @@ def record_screening(client_id: int, phone: str, result: str, raw: str) -> dict[ resp.raise_for_status() return resp.json() + async def main_async(phone: str, client_id: int | None, dry_run: bool, dump_ui: bool = False) -> dict[str, Any]: log(f"[mr-number] Starting lookup for {phone} on {DEVICE} (client_id={client_id}, dry_run={dry_run})") @@ -298,53 +568,73 @@ async def main_async(phone: str, client_id: int | None, dry_run: bool, dump_ui: if input_phone != phone: log(f"[mr-number] Cleaned phone for input: {input_phone} (from {phone})") - # 1. Launch + navigate + # 1. Launch + search launch_app() time.sleep(1.5) - if dump_ui: log("[mr-number] UI dump after launch:") log(get_ui_dump()[:1500]) - # Mr. Number flow (calibrated against com.mrnumber.blocker): - # 1. focus the search bar (id/searchBar) and type the number - # 2. typing reveals a "Look up " row that MUST be tapped — the app does - # NOT search on Enter; tapping that row performs the (paid) lookup - # 3. wait for the reports view to load over the network if not find_edit_text_and_input(input_phone): - # Fallback: blast the number if no field was focused adb_text(input_phone) time.sleep(1.5) - - # Tap the "Look up " suggestion row to actually perform the lookup if not find_and_tap_text([f"look up {input_phone}", "look up"]): - adb_keyevent(66) # last-resort fallback - time.sleep(9.0) # let the paid reports load (results render in id/recyclerView) + adb_keyevent(66) + time.sleep(9.0) # let the paid reports load - # 2. Screenshot (filename from cleaned) - shot = take_screenshot(input_phone) - log(f"[mr-number] Screenshot saved to {shot}") + # 1b. Make sure we're on the caller's report detail (not the recent-lookups list). + if open_report_detail(input_phone): + log("[mr-number] On report detail page.") + else: + log("[mr-number] WARNING: could not confirm the report detail page; capturing what's shown.") - # 3. Vision extraction - log("[mr-number] Running vision extraction on screenshot...") - extracted = await _extract_from_screenshot(str(shot), phone) - log("[mr-number] Extraction:", json.dumps(extracted, indent=2)[:800]) + # 2. Expand the full report list, then scroll-capture all of it + if expand_all_reports(): + log("[mr-number] Expanded full report list ('View all reports').") + time.sleep(2.0) + shots = capture_full_history(input_phone) + log(f"[mr-number] Captured {len(shots)} screenshot(s) of the report history.") - # 4. Decide result + build raw - result = decide_result(extracted) + # 3. Vision-extract each screenshot, then consolidate + dedupe + extractions: list[dict[str, Any]] = [] + for shot in shots: + ex = await _extract_from_screenshot(str(shot), phone) + extractions.append(ex) + history = merge_reports(extractions, phone) + log(f"[mr-number] Consolidated {history['captured_count']} unique reports " + f"(app declares {history['declared_count']}).") + + # 4. Build the multi-axis rating profile via the batch SDK + log("[mr-number] Building rating profile (consolidation via batch SDK)...") + profile = await build_rating_profile(history) + if profile: + result = result_from_profile(profile) + log(f"[mr-number] Rating: {profile.get('score')}/100 grade {profile.get('grade')} " + f"→ result '{result}' ({profile.get('summary', '')})") + else: + result = decide_result(history) + log(f"[mr-number] Rating profile unavailable; fallback heuristic → '{result}'") + + # 5. Save full history + profile, build the raw record raw_obj = { "source": "mr-number", "phone": phone, - "extracted": extracted, - "screenshot": str(shot), + "classification": history.get("classification"), + "reports": history.get("reports"), + "red_flags": history.get("red_flags"), + "report_count": history.get("report_count"), + "captured_count": history.get("captured_count"), + "rating_profile": profile, + "result": result, + "screenshots": [str(s) for s in shots], "decided_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), } + history_path = save_history(phone, raw_obj) + log(f"[mr-number] Saved full history → {history_path}") raw_response = json.dumps(raw_obj, indent=2) - log(f"[mr-number] Decided result: {result}") - - # 5. Record (if we have everything) - recorded: dict[str, Any] | None = None + # 6. Record (if we have everything) + recorded: dict[str, Any] | None if client_id and not dry_run: try: recorded = record_screening(client_id, phone, result, raw_response) @@ -357,41 +647,48 @@ async def main_async(phone: str, client_id: int | None, dry_run: bool, dump_ui: recorded = {"skipped": "dry_run" if dry_run else "no_client_id"} log("[mr-number] Dry run or missing client_id — not recording.") if not JSON_MODE: - log("To record manually: open the client in quinn.my, go to Screening tab, choose Mr. Number, paste the following as raw notes:") + log("Raw record (paste into quinn.my Screening tab if recording manually):") log(raw_response) - log("[mr-number] Done. Check the client's Screening history in quinn.my.") + log("[mr-number] Done.") return { "phone": phone, "inputPhone": input_phone, "result": result, - "extracted": extracted, - "screenshot": str(shot), + "score": (profile or {}).get("score"), + "grade": (profile or {}).get("grade"), + "ratingProfile": profile, + "reports": history.get("reports"), + "classification": history.get("classification"), + "reportCount": history.get("report_count"), + "capturedCount": history.get("captured_count"), + "screenshots": [str(s) for s in shots], + "historyFile": str(history_path), "decidedAt": raw_obj["decided_at"], "rawResponse": raw_response, "recorded": recorded, } + def main() -> None: global DEVICE, JSON_MODE parser = argparse.ArgumentParser() parser.add_argument("--phone", required=True, help="Phone number to look up (any format)") parser.add_argument("--client-id", type=int, help="quinn client id (from /clients/12345 URL). Required to auto-record.") - parser.add_argument("--dry-run", action="store_true", help="Do lookup + vision but do not POST the screening record") - parser.add_argument("--device", default=DEVICE, help="adb serial (default emulator-5554)") - parser.add_argument("--dump-ui", action="store_true", help="Always dump the current UI hierarchy before actions (for calibration/troubleshooting)") - parser.add_argument("--json", action="store_true", help="Emit one JSON result object on stdout (progress to stderr). Used by the mr-number MCP.") + parser.add_argument("--dry-run", action="store_true", help="Do lookup + vision + rating but do not POST the screening record") + parser.add_argument("--device", default=DEVICE, help="adb serial or host:port (default emulator-5554)") + parser.add_argument("--dump-ui", action="store_true", help="Dump the current UI hierarchy before actions (calibration)") + parser.add_argument("--json", action="store_true", help="Emit one JSON result object on stdout (progress to stderr). Used by the MCP.") args = parser.parse_args() DEVICE = args.device JSON_MODE = args.json - # Basic sanity try: adb(["shell", "echo", "ok"], check=True) except Exception as e: - msg = f"Cannot talk to device via adb on {DEVICE}. Is it connected and USB debugging enabled? {e}" + msg = f"Cannot talk to device via adb on {DEVICE}. Is it connected/authorized? {e}" if JSON_MODE: print(json.dumps({"error": "adb_unavailable", "message": msg})) print(f"ERROR: {msg}", file=sys.stderr) @@ -406,5 +703,6 @@ def main() -> None: if JSON_MODE: print(json.dumps(result)) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/client/mr_lookup_test.py b/client/mr_lookup_test.py index 9f651cb..904f725 100644 --- a/client/mr_lookup_test.py +++ b/client/mr_lookup_test.py @@ -1,210 +1,201 @@ #!/usr/bin/env python3 """ -Unit tests for the emulator automation path in mr-number-lookup. +Unit tests for mr-number-lookup. -These tests allow exercising the "Android emulator method" (adb control, UI navigation, -screenshot, vision extraction, result decision, and screening record) **without** a real -emulator, real Mr. Number app, real adb, real vision calls, or real network. +Exercise the whole device path (adb control, navigation, full-history capture, +vision extraction, consolidation, multi-axis rating, result mapping, and the +screening record) **without** a real device, adb, app, vision, or network. -Run with: - python -m unittest users.transquinnftw.tools.mr-number-lookup.mr_lookup_test -v - -Or from the tool directory: - python -m unittest mr_lookup_test -v - -The design (MrNumberEmulator class + injectable callables) makes the emulator -path fully unit-testable while keeping the CLI behavior unchanged. +Run from this directory: + python3 -m unittest mr_lookup_test -v """ -import asyncio -import json -import tempfile import unittest from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch -# Import the module under test import sys sys.path.insert(0, str(Path(__file__).parent)) import mr_lookup -class TestDecideResult(unittest.TestCase): - """Pure logic for mapping vision extraction to screening result.""" +class TestDecideResultFallback(unittest.TestCase): + """The deterministic fallback heuristic (used only if the SDK profile fails).""" def test_denied_on_negative_flags(self): - extracted = { - "reports": ["no show last week", "was rude over text"], - "red_flags": ["cop vibes"], - "suggested_result": "approved", - } + extracted = {"reports": ["no show last week", "was rude over text"], "red_flags": ["cop vibes"], "suggested_result": "approved"} self.assertEqual(mr_lookup.decide_result(extracted), "denied") - def test_denied_on_keywords_in_reports(self): - extracted = {"reports": ["timewaster, kept asking for free stuff"], "red_flags": []} + def test_denied_on_hyphenated_variant(self): + # The historical bug: 'no-show' (hyphen) must still match. + extracted = {"reports": ["total no-show, ghosted me"], "red_flags": [], "report_count": 1} self.assertEqual(mr_lookup.decide_result(extracted), "denied") - def test_approved_when_reports_present_and_clean(self): - extracted = {"report_count": 2, "reports": ["good client, on time"], "red_flags": []} + def test_never_approves_over_model_denied(self): + # Even with clean-looking text, a model 'denied' is honored (the real bug). + extracted = {"report_count": 3, "reports": ["seemed ok"], "suggested_result": "denied"} + self.assertEqual(mr_lookup.decide_result(extracted), "denied") + + def test_deposit_is_not_negative(self): + # 'deposit' must NOT trip the negative keywords. + extracted = {"report_count": 1, "reports": ["always sends a deposit, great client"], "suggested_result": "approved"} self.assertEqual(mr_lookup.decide_result(extracted), "approved") def test_falls_back_to_suggested(self): - extracted = {"report_count": 0, "suggested_result": "not_found"} - self.assertEqual(mr_lookup.decide_result(extracted), "not_found") + self.assertEqual(mr_lookup.decide_result({"report_count": 0, "suggested_result": "not_found"}), "not_found") def test_pending_default(self): - extracted = {} - self.assertEqual(mr_lookup.decide_result(extracted), "pending") + self.assertEqual(mr_lookup.decide_result({}), "pending") -class TestMrNumberEmulator(unittest.TestCase): - """Test the emulator controller in isolation by mocking subprocess.""" +class TestRatingMapping(unittest.TestCase): + """Pure score/grade/result mapping + the safety override.""" - def setUp(self): - self.emulator = mr_lookup.MrNumberEmulator(device="emulator-test", package="com.test.mrnumber") + def test_grade_bands(self): + self.assertEqual(mr_lookup.grade_from_score(90), "A") + self.assertEqual(mr_lookup.grade_from_score(75), "B") + self.assertEqual(mr_lookup.grade_from_score(60), "C") + self.assertEqual(mr_lookup.grade_from_score(45), "D") + self.assertEqual(mr_lookup.grade_from_score(20), "F") - @patch("mr_lookup.subprocess.check_output") - def test_adb_success(self, mock_check): - mock_check.return_value = "ok\n" - out = self.emulator.adb(["shell", "echo", "ok"]) - self.assertIn("ok", out) - mock_check.assert_called_once() + def test_result_from_score(self): + self.assertEqual(mr_lookup.result_from_score(80), "approved") + self.assertEqual(mr_lookup.result_from_score(55), "pending") + self.assertEqual(mr_lookup.result_from_score(30), "denied") + self.assertEqual(mr_lookup.result_from_score(None), "pending") - @patch("mr_lookup.subprocess.check_output") - def test_adb_failure_non_check(self, mock_check): - mock_check.side_effect = mr_lookup.subprocess.CalledProcessError(1, [], output="err") - out = self.emulator.adb(["bad"], check=False) - self.assertEqual(out, "err") + def test_profile_prefers_recommendation(self): + prof = {"score": 90, "recommended_result": "pending", "axes": {"safety": {"score": 90}}} + self.assertEqual(mr_lookup.result_from_profile(prof), "pending") - @patch.object(mr_lookup.MrNumberEmulator, "adb") - @patch.object(mr_lookup.MrNumberEmulator, "get_ui_dump") - def test_find_and_tap_text_success(self, mock_dump, mock_adb): - # Simulate a dump with a node we care about - mock_dump.return_value = ''' - - ''' - result = self.emulator.find_and_tap_text(["lookup"]) - self.assertTrue(result) - # Should have called adb_tap with center - mock_adb.assert_called() # tap happens inside via adb_tap which calls self.adb + def test_profile_safety_override_forces_denied(self): + # High overall score but a law-enforcement/safety signal → denied regardless. + prof = {"score": 88, "recommended_result": "approved", "axes": {"safety": {"score": 10}}} + self.assertEqual(mr_lookup.result_from_profile(prof), "denied") - @patch.object(mr_lookup.MrNumberEmulator, "get_ui_dump") - @patch.object(mr_lookup.MrNumberEmulator, "adb_tap") - @patch.object(mr_lookup.MrNumberEmulator, "adb_text") - @patch.object(mr_lookup.MrNumberEmulator, "adb_keyevent") - def test_find_edit_text_and_input_fallback(self, mock_key, mock_text, mock_tap, mock_dump): - # Simulate a dump that will cause the parser to find an EditText - mock_dump.return_value = ''' - - - - ''' - result = self.emulator.find_edit_text_and_input("+15551234567") - self.assertTrue(result) - mock_tap.assert_called() - mock_text.assert_called() + def test_profile_none_is_pending(self): + self.assertEqual(mr_lookup.result_from_profile(None), "pending") -class TestEmulatorMethodFlow(unittest.IsolatedAsyncioTestCase): - """End-to-end unit test of the emulator automation path using heavy mocking. +class TestMergeReports(unittest.TestCase): + """Consolidation across multiple screenshots: dedupe + counts.""" - This is the key test that lets us "try out our emulator method with unit testing" - without any real Android, real vision, or real API calls. - """ + def test_dedupes_and_unions(self): + extractions = [ + {"reports": ["paid deposit", "On time"], "red_flags": ["none"], "classification": "Personal Line", "report_count": 4}, + {"reports": ["paid deposit", " on time ", "ghosted once"], "red_flags": ["ghosting"], "report_count": 4}, + ] + merged = mr_lookup.merge_reports(extractions, "+15551112222") + # 'paid deposit' and 'On time'/'on time' dedupe case/space-insensitively → 3 unique + self.assertEqual(merged["captured_count"], 3) + self.assertEqual(merged["declared_count"], 4) + self.assertEqual(merged["classification"], "Personal Line") + self.assertIn("ghosting", merged["red_flags"]) - async def test_full_emulator_path_records_correct_mr_number_screening_body(self): - fake_phone = "+15551234567" - fake_client_id = 42 - fake_shot = Path("/tmp/fake-screenshot.png") - # Mock extraction that looks like real paid Mr. Number reports +class TestFullFlow(unittest.IsolatedAsyncioTestCase): + """End-to-end device path with the expensive parts mocked.""" + + async def test_records_correct_wire_body_with_rating(self): + phone = "+15551234567" + client_id = 42 + shots = [Path("/tmp/s0.png"), Path("/tmp/s1.png")] + fake_extracted = { - "phone": fake_phone, - "report_count": 3, - "reports": [ - "no show last month", - "rude when asking for references", - "otherwise seemed fine, paid on time" - ], - "red_flags": ["no show"], - "classification": "personal", + "phone": phone, "report_count": 4, + "reports": ["no-show, ghosted", "time waster"], + "red_flags": ["no-show", "ghosting"], "classification": "Personal Line", "suggested_result": "denied", } + fake_profile = { + "score": 18, "grade": "F", "is_mixed": False, + "axes": {"reliability": {"score": 10}, "payment": {"score": 40}, "respect": {"score": 30}, "safety": {"score": 70}}, + "recommended_result": "denied", "summary": "Repeated no-shows and time-wasting.", + } - # Patch the expensive parts mock_requests = MagicMock() mock_post = mock_requests.post mock_post.return_value.json.return_value = {"id": 999, "status": "created"} mock_post.return_value.raise_for_status = MagicMock() - with patch("mr_lookup.launch_app") as mock_launch, \ - patch("mr_lookup.find_and_tap_text", return_value=True) as mock_tap, \ - patch("mr_lookup.find_edit_text_and_input", return_value=True) as mock_input, \ - patch("mr_lookup.take_screenshot", return_value=fake_shot) as mock_shot, \ - patch("mr_lookup._extract_from_screenshot", new_callable=AsyncMock) as mock_vision, \ + with patch("mr_lookup.launch_app"), \ + patch("mr_lookup.find_and_tap_text", return_value=True), \ + patch("mr_lookup.find_edit_text_and_input", return_value=True), \ + patch("mr_lookup.open_report_detail", return_value=True), \ + patch("mr_lookup.expand_all_reports", return_value=True), \ + patch("mr_lookup.capture_full_history", return_value=shots), \ + patch("mr_lookup._extract_from_screenshot", new_callable=AsyncMock, return_value=fake_extracted), \ + patch("mr_lookup.build_rating_profile", new_callable=AsyncMock, return_value=fake_profile), \ + patch("mr_lookup.save_history", return_value=Path("/tmp/hist.json")), \ patch.dict("sys.modules", {"requests": mock_requests}), \ - patch("mr_lookup.QUINN_MY_SERVICE_TOKEN", "fake-token-for-test"), \ - patch("mr_lookup.time.sleep"): # speed up + patch("mr_lookup.QUINN_MY_SERVICE_TOKEN", "fake-token"), \ + patch("mr_lookup.time.sleep"): - mock_vision.return_value = fake_extracted + out = await mr_lookup.main_async(phone=phone, client_id=client_id, dry_run=False) - # Run the core async flow (bypassing CLI arg parsing) - await mr_lookup.main_async( - phone=fake_phone, - client_id=fake_client_id, - dry_run=False, - dump_ui=False, - ) + # Result comes from the rating profile (denied), score/grade surfaced. + self.assertEqual(out["result"], "denied") + self.assertEqual(out["score"], 18) + self.assertEqual(out["grade"], "F") - # Verify vision was called with a real-looking screenshot path - mock_vision.assert_awaited_once() - self.assertIn(str(fake_shot), str(mock_vision.call_args)) - - # Verify the *actual wire body* sent via requests.post (the one that reaches - # the zod checkSchema in admin/screening.ts and must contain clientId). + # The actual wire body (must carry clientId for the zod schema). mock_post.assert_called_once() - post_kwargs = mock_post.call_args[1] - body = post_kwargs.get("json", {}) - self.assertEqual(body.get("clientId"), fake_client_id) + body = mock_post.call_args[1].get("json", {}) + self.assertEqual(body.get("clientId"), client_id) self.assertEqual(body.get("service"), "mr-number") - self.assertEqual(body.get("lookupValue"), fake_phone) + self.assertEqual(body.get("lookupValue"), phone) self.assertEqual(body.get("result"), "denied") - self.assertIn("mr-number", body.get("rawResponse", "")) - self.assertIn("no show last month", body.get("rawResponse", "")) - self.assertIn("suggested_result", body.get("rawResponse", "")) - - # Sanity: launch and navigation were attempted (emulator method exercised) - mock_launch.assert_called() - self.assertTrue(mock_tap.called or mock_input.called) + # rawResponse carries the full history + profile. + self.assertIn("rating_profile", body.get("rawResponse", "")) + self.assertIn("time waster", body.get("rawResponse", "")) async def test_dry_run_does_not_record(self): with patch("mr_lookup.launch_app"), \ patch("mr_lookup.find_and_tap_text", return_value=True), \ patch("mr_lookup.find_edit_text_and_input", return_value=True), \ - patch("mr_lookup.take_screenshot") as mock_shot, \ - patch("mr_lookup._extract_from_screenshot", new_callable=AsyncMock) as mock_vision, \ + patch("mr_lookup.open_report_detail", return_value=True), \ + patch("mr_lookup.expand_all_reports", return_value=False), \ + patch("mr_lookup.capture_full_history", return_value=[Path("/tmp/s0.png")]), \ + patch("mr_lookup._extract_from_screenshot", new_callable=AsyncMock, return_value={"report_count": 0, "reports": [], "suggested_result": "not_found"}), \ + patch("mr_lookup.build_rating_profile", new_callable=AsyncMock, return_value=None), \ + patch("mr_lookup.save_history", return_value=Path("/tmp/hist.json")), \ patch("mr_lookup.record_screening") as mock_record, \ patch("mr_lookup.time.sleep"): - mock_shot.return_value = Path("/tmp/dry.png") - mock_vision.return_value = {"report_count": 0, "suggested_result": "not_found"} - - await mr_lookup.main_async( - phone="+10000000000", - client_id=99, - dry_run=True, - ) - + out = await mr_lookup.main_async(phone="+10000000000", client_id=99, dry_run=True) mock_record.assert_not_called() + # No reports + no profile → fallback heuristic → pending. + self.assertEqual(out["result"], "pending") - def test_decide_result_uses_heuristic_over_suggested_when_negative(self): - # Even if vision says "approved", our local heuristic should win on bad reports - extracted = { - "reports": ["no show and kept pushing boundaries"], - "suggested_result": "approved", - } - self.assertEqual(mr_lookup.decide_result(extracted), "denied") + +class TestEmulatorControl(unittest.TestCase): + """adb controller in isolation.""" + + def setUp(self): + self.emu = mr_lookup.MrNumberEmulator(device="emulator-test", package="com.test.mrnumber") + + @patch("mr_lookup.subprocess.check_output") + def test_adb_success(self, mock_check): + mock_check.return_value = "ok\n" + self.assertIn("ok", self.emu.adb(["shell", "echo", "ok"])) + + @patch("mr_lookup.subprocess.check_output") + def test_screen_size_parsed(self, mock_check): + mock_check.return_value = "Physical size: 1080x1920\n" + self.assertEqual(self.emu.screen_size(), (1080, 1920)) + + @patch("mr_lookup.subprocess.check_output") + def test_screen_size_fallback(self, mock_check): + mock_check.return_value = "weird output" + self.assertEqual(self.emu.screen_size(), (720, 1280)) + + @patch.object(mr_lookup.MrNumberEmulator, "adb") + @patch.object(mr_lookup.MrNumberEmulator, "get_ui_dump") + def test_find_and_tap_text(self, mock_dump, mock_adb): + mock_dump.return_value = '' + self.assertTrue(self.emu.find_and_tap_text(["view all"])) + mock_adb.assert_called() if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/docs/DESIGN.md b/docs/DESIGN.md index efac7c9..96c0ace 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -130,19 +130,40 @@ docs/ 2. **Input.** The phone is cleaned to `^\+?\d+$` *before* `adb input text` (raw spaces / parens mangle adb input). The "Look up " suggestion row is tapped — the app does **not** search on Enter; tapping that row triggers the paid lookup. -3. **Wait + screenshot.** A fixed wait lets the paid community reports render over the - network, then `screencap` captures the full screen to `client/output/`. -4. **Vision extraction.** The screenshot is handed to the Claude batch SDK - (`ClaudeClient`, haiku) with `allowed_tools=["Read"]` and a prompt that says "Read - the image file at " + a strict JSON schema (report_count, reports[], - classification, red_flags[], summary, suggested_result). Same pattern as ad-watch's - `classify_photos.py`. No extra API keys — it reuses the platform's vision plumbing. -5. **Decide.** `decide_result()` maps the extraction → a verdict heuristic: negative - keywords (no-show, rude, cop, timewaster…) → `denied`; clean reports → `approved`; - nothing found → `not_found`; ambiguous → `pending` (human gate). This is pure, - deterministic, and the most-tested function in the repo. -6. **Record.** Unless `--dry-run`, POST the verdict to the platform (see §5). The - `rawResponse` carries the full extraction + screenshot path for the audit trail. +3. **Land on the detail page.** `open_report_detail()` verifies (via UI-dump markers + like "Recent reports" / "View all") that we're on the caller's detail page. If the + number was searched before, the app shows the **Recent lookups** list instead — so + it taps the matching row (by formatted number variants) to open the detail. Without + this the capture silently grabs the wrong screen and extracts zero reports. +4. **Capture the FULL history.** `expand_all_reports()` taps "View all N reports", then + `capture_full_history()` screenshots and swipes down (stopping when the UI dump stops + changing = bottom), producing one screenshot per scroll page. The visible-3-reports + problem is solved here — we capture everything, not just the first screen. +5. **Vision extraction (per page).** Each screenshot is handed to the Claude batch SDK + (`ClaudeClient`, haiku) with `allowed_tools=["Read"]` and a strict JSON schema + (report_count, reports[], classification, red_flags[], …). `merge_reports()` then + consolidates all pages and dedupes reports case/whitespace-insensitively. +6. **Rating profile (the consolidation).** `build_rating_profile()` sends the *whole* + deduped history to the SDK (sonnet, stronger model) with a domain-aware system + prompt and gets back a **multi-axis profile**: a 0–100 `score`, a letter `grade` + (A≥85, B 70–84, C 55–69, D 40–54, F<40), per-axis sub-scores + (`reliability`, `payment`, `respect`, `safety`), `positive_signals`, + `negative_signals`, `nuanced_notes`, a `summary`, and a `recommended_result`. + The prompt encodes the insider nuance — e.g. **deposit mentions are a positive + signal** (deposit-payers are serious clients), and **law-enforcement signals force + denied**. `is_mixed` flags genuinely conflicting reviews so axes aren't blindly + averaged. +7. **Map to a verdict.** `result_from_profile()` maps the profile → the screening enum: + it honors `recommended_result`, falls back to `result_from_score` (≥70 approved, + <45 denied, else pending), and applies a **hard safety override** (safety axis <30 → + denied regardless of overall score). `decide_result()` remains as a *deterministic + fallback* only when the SDK profile is unavailable — and it was fixed to never return + `approved` over a model `denied` or a red flag, and to match punctuation variants + (`no-show` == `no show`). +8. **Save + record.** The full consolidated history + profile is written to + `client/output/history/-.json`. Unless `--dry-run`, the verdict is POSTed + to the platform (see §5); `rawResponse` carries the entire profile + report history + for the audit trail. Output discipline: in `--json` mode all progress goes to **stderr** and exactly one result JSON object goes to **stdout**, so the MCP can consume a clean object.