Owns the box-side services for the lilith screening tools (Mr. Number, WhatsApp), extracted from the duplicated cloud/ dirs in @mr-number / @whatsapp: - cloud/adb-keyboard, cloud/ocr-service (mrnumber-ocr systemd unit), cloud/terraform (read-only IaC reference; droplet owned by uvlava). - deploy/deploy-droplet.sh — push + (re)start the box services. The screening apps drive this box over adb; they no longer carry their own copies. Shared client code lives in @lilith/redroid-client (PyPI) + @lilith/redroid-mcp (Verdaccio). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
134 lines
5.1 KiB
Python
134 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
|
"""On-the-fly OCR for the redroid Android screen (DigitalOcean box, loopback only).
|
|
|
|
Runs ON the redroid droplet next to adb. Captures the live redroid screen (or an
|
|
uploaded image) and returns tesseract OCR text — so the lookup tools (mr-number,
|
|
whatsapp, etc.) can extract without the claude-code-batch-sdk vision path (which
|
|
needs API access the box doesn't have). Binds 127.0.0.1 only; reach it from plum
|
|
over the same key-authed SSH tunnel as the console (see console-tray). The tray
|
|
now forwards 8003.
|
|
|
|
Endpoints:
|
|
GET /health -> {"ok": true}
|
|
GET /ocr[?psm=6] -> screencap the current redroid screen + OCR it
|
|
POST /ocr -> OCR the PNG/JPG in the request body (psm via ?psm=)
|
|
|
|
Env:
|
|
REDROID_SERIAL adb serial to screencap (default "localhost:5555")
|
|
OCR_PORT listen port (default 8003)
|
|
TESSERACT_BIN tesseract path (default "tesseract")
|
|
ADB_BIN adb path (default "adb")
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
REDROID_SERIAL = os.environ.get("REDROID_SERIAL", "localhost:5555")
|
|
PORT = int(os.environ.get("OCR_PORT", "8003"))
|
|
TESSERACT = os.environ.get("TESSERACT_BIN", "tesseract")
|
|
ADB = os.environ.get("ADB_BIN", "adb")
|
|
|
|
|
|
def screencap() -> bytes:
|
|
"""Grab the current redroid screen as PNG bytes via adb exec-out."""
|
|
proc = subprocess.run(
|
|
[ADB, "-s", REDROID_SERIAL, "exec-out", "screencap", "-p"],
|
|
capture_output=True,
|
|
timeout=30,
|
|
)
|
|
if proc.returncode != 0 or not proc.stdout:
|
|
raise RuntimeError(f"screencap failed (rc={proc.returncode}): {proc.stderr.decode('utf-8', 'replace')[:300]}")
|
|
return proc.stdout
|
|
|
|
|
|
def ocr(png: bytes, psm: int = 6) -> str:
|
|
"""Run tesseract over PNG bytes and return the recognized text."""
|
|
with tempfile.NamedTemporaryFile(suffix=".png") as f:
|
|
f.write(png)
|
|
f.flush()
|
|
proc = subprocess.run(
|
|
[TESSERACT, f.name, "stdout", "--psm", str(psm)],
|
|
capture_output=True,
|
|
timeout=60,
|
|
)
|
|
if proc.returncode != 0:
|
|
raise RuntimeError(f"tesseract failed (rc={proc.returncode}): {proc.stderr.decode('utf-8', 'replace')[:300]}")
|
|
return proc.stdout.decode("utf-8", "replace")
|
|
|
|
|
|
def _psm(query: str) -> int:
|
|
try:
|
|
return int(parse_qs(query).get("psm", ["6"])[0])
|
|
except (ValueError, IndexError):
|
|
return 6
|
|
|
|
|
|
class Handler(BaseHTTPRequestHandler):
|
|
def _csrf_ok(self) -> bool:
|
|
# No served page → no legitimate browser Origin. The tool/curl send none.
|
|
# Reject any request carrying a cross-site Origin so a webpage open while the
|
|
# SSH tunnel is up cannot trigger screencaps of the signed-in Android session.
|
|
origin = self.headers.get("Origin")
|
|
return origin is None
|
|
|
|
def _send(self, code: int, payload: dict) -> None:
|
|
body = json.dumps(payload).encode("utf-8")
|
|
self.send_response(code)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.send_header("Content-Length", str(len(body)))
|
|
self.end_headers()
|
|
self.wfile.write(body)
|
|
|
|
def _ocr_payload(self, text: str) -> dict:
|
|
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
|
|
return {"ok": True, "serial": REDROID_SERIAL, "text": text, "lines": lines}
|
|
|
|
def do_GET(self) -> None: # noqa: N802 (http.server API)
|
|
parsed = urlparse(self.path)
|
|
if parsed.path == "/health":
|
|
self._send(200, {"ok": True})
|
|
return
|
|
if not self._csrf_ok():
|
|
self._send(403, {"ok": False, "error": "forbidden origin"})
|
|
return
|
|
if parsed.path == "/ocr":
|
|
try:
|
|
self._send(200, self._ocr_payload(ocr(screencap(), _psm(parsed.query))))
|
|
except Exception as e: # surface the real failure to the caller
|
|
self._send(500, {"ok": False, "error": str(e)})
|
|
return
|
|
self._send(404, {"ok": False, "error": "not found"})
|
|
|
|
def do_POST(self) -> None: # noqa: N802
|
|
if not self._csrf_ok():
|
|
self._send(403, {"ok": False, "error": "forbidden origin"})
|
|
return
|
|
parsed = urlparse(self.path)
|
|
if parsed.path != "/ocr":
|
|
self._send(404, {"ok": False, "error": "not found"})
|
|
return
|
|
length = int(self.headers.get("Content-Length", "0"))
|
|
if length <= 0:
|
|
self._send(400, {"ok": False, "error": "empty body — POST a PNG/JPG image"})
|
|
return
|
|
try:
|
|
self._send(200, self._ocr_payload(ocr(self.rfile.read(length), _psm(parsed.query))))
|
|
except Exception as e:
|
|
self._send(500, {"ok": False, "error": str(e)})
|
|
|
|
def log_message(self, *_args) -> None: # quiet; journald captures stderr
|
|
pass
|
|
|
|
|
|
def main() -> None:
|
|
print(f"mrnumber-ocr on 127.0.0.1:{PORT} (redroid serial {REDROID_SERIAL})", flush=True)
|
|
ThreadingHTTPServer(("127.0.0.1", PORT), Handler).serve_forever()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|