Replace the brittle keyword verdict with an LLM-consolidated rating profile per caller, and capture the COMPLETE report history instead of the first screen. - open_report_detail(): land on the caller detail page (taps the Recent-lookups row when the number was searched before) — fixes the 0-reports regression - expand_all_reports() + capture_full_history(): tap "View all N", scroll-capture every page until the UI dump stops changing; merge_reports() dedupes across pages - build_rating_profile() (batch SDK, sonnet): 0-100 score + A–F grade + per-axis sub-scores (reliability/payment/respect/safety) + signals + nuanced_notes. Domain nuance: deposit mentions weight POSITIVE; law-enforcement forces denied - result_from_profile(): honors recommendation, score fallback, hard safety override - decide_result(): kept as deterministic fallback, fixed to never approve over a model 'denied' / red flag and to match punctuation variants (no-show == no show) - save_history(): persist full consolidated history + profile per caller - tests: 18/18 (mapping, dedupe, safety override, full flow); DESIGN.md updated Verified live against the redroid droplet (45.55.191.82): 15166687821 → 3 reports consolidated → 18/100 grade F → denied, with multi-axis breakdown. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
201 lines
9.6 KiB
Python
201 lines
9.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Unit tests for mr-number-lookup.
|
|
|
|
Exercise the whole device path (adb control, navigation, full-history capture,
|
|
vision extraction, consolidation, multi-axis rating, result mapping, and the
|
|
screening record) **without** a real device, adb, app, vision, or network.
|
|
|
|
Run from this directory:
|
|
python3 -m unittest mr_lookup_test -v
|
|
"""
|
|
|
|
import unittest
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
import mr_lookup
|
|
|
|
|
|
class TestDecideResultFallback(unittest.TestCase):
|
|
"""The deterministic fallback heuristic (used only if the SDK profile fails)."""
|
|
|
|
def test_denied_on_negative_flags(self):
|
|
extracted = {"reports": ["no show last week", "was rude over text"], "red_flags": ["cop vibes"], "suggested_result": "approved"}
|
|
self.assertEqual(mr_lookup.decide_result(extracted), "denied")
|
|
|
|
def test_denied_on_hyphenated_variant(self):
|
|
# The historical bug: 'no-show' (hyphen) must still match.
|
|
extracted = {"reports": ["total no-show, ghosted me"], "red_flags": [], "report_count": 1}
|
|
self.assertEqual(mr_lookup.decide_result(extracted), "denied")
|
|
|
|
def test_never_approves_over_model_denied(self):
|
|
# Even with clean-looking text, a model 'denied' is honored (the real bug).
|
|
extracted = {"report_count": 3, "reports": ["seemed ok"], "suggested_result": "denied"}
|
|
self.assertEqual(mr_lookup.decide_result(extracted), "denied")
|
|
|
|
def test_deposit_is_not_negative(self):
|
|
# 'deposit' must NOT trip the negative keywords.
|
|
extracted = {"report_count": 1, "reports": ["always sends a deposit, great client"], "suggested_result": "approved"}
|
|
self.assertEqual(mr_lookup.decide_result(extracted), "approved")
|
|
|
|
def test_falls_back_to_suggested(self):
|
|
self.assertEqual(mr_lookup.decide_result({"report_count": 0, "suggested_result": "not_found"}), "not_found")
|
|
|
|
def test_pending_default(self):
|
|
self.assertEqual(mr_lookup.decide_result({}), "pending")
|
|
|
|
|
|
class TestRatingMapping(unittest.TestCase):
|
|
"""Pure score/grade/result mapping + the safety override."""
|
|
|
|
def test_grade_bands(self):
|
|
self.assertEqual(mr_lookup.grade_from_score(90), "A")
|
|
self.assertEqual(mr_lookup.grade_from_score(75), "B")
|
|
self.assertEqual(mr_lookup.grade_from_score(60), "C")
|
|
self.assertEqual(mr_lookup.grade_from_score(45), "D")
|
|
self.assertEqual(mr_lookup.grade_from_score(20), "F")
|
|
|
|
def test_result_from_score(self):
|
|
self.assertEqual(mr_lookup.result_from_score(80), "approved")
|
|
self.assertEqual(mr_lookup.result_from_score(55), "pending")
|
|
self.assertEqual(mr_lookup.result_from_score(30), "denied")
|
|
self.assertEqual(mr_lookup.result_from_score(None), "pending")
|
|
|
|
def test_profile_prefers_recommendation(self):
|
|
prof = {"score": 90, "recommended_result": "pending", "axes": {"safety": {"score": 90}}}
|
|
self.assertEqual(mr_lookup.result_from_profile(prof), "pending")
|
|
|
|
def test_profile_safety_override_forces_denied(self):
|
|
# High overall score but a law-enforcement/safety signal → denied regardless.
|
|
prof = {"score": 88, "recommended_result": "approved", "axes": {"safety": {"score": 10}}}
|
|
self.assertEqual(mr_lookup.result_from_profile(prof), "denied")
|
|
|
|
def test_profile_none_is_pending(self):
|
|
self.assertEqual(mr_lookup.result_from_profile(None), "pending")
|
|
|
|
|
|
class TestMergeReports(unittest.TestCase):
|
|
"""Consolidation across multiple screenshots: dedupe + counts."""
|
|
|
|
def test_dedupes_and_unions(self):
|
|
extractions = [
|
|
{"reports": ["paid deposit", "On time"], "red_flags": ["none"], "classification": "Personal Line", "report_count": 4},
|
|
{"reports": ["paid deposit", " on time ", "ghosted once"], "red_flags": ["ghosting"], "report_count": 4},
|
|
]
|
|
merged = mr_lookup.merge_reports(extractions, "+15551112222")
|
|
# 'paid deposit' and 'On time'/'on time' dedupe case/space-insensitively → 3 unique
|
|
self.assertEqual(merged["captured_count"], 3)
|
|
self.assertEqual(merged["declared_count"], 4)
|
|
self.assertEqual(merged["classification"], "Personal Line")
|
|
self.assertIn("ghosting", merged["red_flags"])
|
|
|
|
|
|
class TestFullFlow(unittest.IsolatedAsyncioTestCase):
|
|
"""End-to-end device path with the expensive parts mocked."""
|
|
|
|
async def test_records_correct_wire_body_with_rating(self):
|
|
phone = "+15551234567"
|
|
client_id = 42
|
|
shots = [Path("/tmp/s0.png"), Path("/tmp/s1.png")]
|
|
|
|
fake_extracted = {
|
|
"phone": phone, "report_count": 4,
|
|
"reports": ["no-show, ghosted", "time waster"],
|
|
"red_flags": ["no-show", "ghosting"], "classification": "Personal Line",
|
|
"suggested_result": "denied",
|
|
}
|
|
fake_profile = {
|
|
"score": 18, "grade": "F", "is_mixed": False,
|
|
"axes": {"reliability": {"score": 10}, "payment": {"score": 40}, "respect": {"score": 30}, "safety": {"score": 70}},
|
|
"recommended_result": "denied", "summary": "Repeated no-shows and time-wasting.",
|
|
}
|
|
|
|
mock_requests = MagicMock()
|
|
mock_post = mock_requests.post
|
|
mock_post.return_value.json.return_value = {"id": 999, "status": "created"}
|
|
mock_post.return_value.raise_for_status = MagicMock()
|
|
|
|
with patch("mr_lookup.launch_app"), \
|
|
patch("mr_lookup.find_and_tap_text", return_value=True), \
|
|
patch("mr_lookup.find_edit_text_and_input", return_value=True), \
|
|
patch("mr_lookup.open_report_detail", return_value=True), \
|
|
patch("mr_lookup.expand_all_reports", return_value=True), \
|
|
patch("mr_lookup.capture_full_history", return_value=shots), \
|
|
patch("mr_lookup._extract_from_screenshot", new_callable=AsyncMock, return_value=fake_extracted), \
|
|
patch("mr_lookup.build_rating_profile", new_callable=AsyncMock, return_value=fake_profile), \
|
|
patch("mr_lookup.save_history", return_value=Path("/tmp/hist.json")), \
|
|
patch.dict("sys.modules", {"requests": mock_requests}), \
|
|
patch("mr_lookup.QUINN_MY_SERVICE_TOKEN", "fake-token"), \
|
|
patch("mr_lookup.time.sleep"):
|
|
|
|
out = await mr_lookup.main_async(phone=phone, client_id=client_id, dry_run=False)
|
|
|
|
# Result comes from the rating profile (denied), score/grade surfaced.
|
|
self.assertEqual(out["result"], "denied")
|
|
self.assertEqual(out["score"], 18)
|
|
self.assertEqual(out["grade"], "F")
|
|
|
|
# The actual wire body (must carry clientId for the zod schema).
|
|
mock_post.assert_called_once()
|
|
body = mock_post.call_args[1].get("json", {})
|
|
self.assertEqual(body.get("clientId"), client_id)
|
|
self.assertEqual(body.get("service"), "mr-number")
|
|
self.assertEqual(body.get("lookupValue"), phone)
|
|
self.assertEqual(body.get("result"), "denied")
|
|
# rawResponse carries the full history + profile.
|
|
self.assertIn("rating_profile", body.get("rawResponse", ""))
|
|
self.assertIn("time waster", body.get("rawResponse", ""))
|
|
|
|
async def test_dry_run_does_not_record(self):
|
|
with patch("mr_lookup.launch_app"), \
|
|
patch("mr_lookup.find_and_tap_text", return_value=True), \
|
|
patch("mr_lookup.find_edit_text_and_input", return_value=True), \
|
|
patch("mr_lookup.open_report_detail", return_value=True), \
|
|
patch("mr_lookup.expand_all_reports", return_value=False), \
|
|
patch("mr_lookup.capture_full_history", return_value=[Path("/tmp/s0.png")]), \
|
|
patch("mr_lookup._extract_from_screenshot", new_callable=AsyncMock, return_value={"report_count": 0, "reports": [], "suggested_result": "not_found"}), \
|
|
patch("mr_lookup.build_rating_profile", new_callable=AsyncMock, return_value=None), \
|
|
patch("mr_lookup.save_history", return_value=Path("/tmp/hist.json")), \
|
|
patch("mr_lookup.record_screening") as mock_record, \
|
|
patch("mr_lookup.time.sleep"):
|
|
|
|
out = await mr_lookup.main_async(phone="+10000000000", client_id=99, dry_run=True)
|
|
mock_record.assert_not_called()
|
|
# No reports + no profile → fallback heuristic → pending.
|
|
self.assertEqual(out["result"], "pending")
|
|
|
|
|
|
class TestEmulatorControl(unittest.TestCase):
|
|
"""adb controller in isolation."""
|
|
|
|
def setUp(self):
|
|
self.emu = mr_lookup.MrNumberEmulator(device="emulator-test", package="com.test.mrnumber")
|
|
|
|
@patch("mr_lookup.subprocess.check_output")
|
|
def test_adb_success(self, mock_check):
|
|
mock_check.return_value = "ok\n"
|
|
self.assertIn("ok", self.emu.adb(["shell", "echo", "ok"]))
|
|
|
|
@patch("mr_lookup.subprocess.check_output")
|
|
def test_screen_size_parsed(self, mock_check):
|
|
mock_check.return_value = "Physical size: 1080x1920\n"
|
|
self.assertEqual(self.emu.screen_size(), (1080, 1920))
|
|
|
|
@patch("mr_lookup.subprocess.check_output")
|
|
def test_screen_size_fallback(self, mock_check):
|
|
mock_check.return_value = "weird output"
|
|
self.assertEqual(self.emu.screen_size(), (720, 1280))
|
|
|
|
@patch.object(mr_lookup.MrNumberEmulator, "adb")
|
|
@patch.object(mr_lookup.MrNumberEmulator, "get_ui_dump")
|
|
def test_find_and_tap_text(self, mock_dump, mock_adb):
|
|
mock_dump.return_value = '<node text="View all 4 reports" bounds="[100,200][300,400]" />'
|
|
self.assertTrue(self.emu.find_and_tap_text(["view all"]))
|
|
mock_adb.assert_called()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|