redroid/cloud/ocr-service/server.py
Natalie b6709a755d feat(redroid): the shared redroid Android box app
Owns the box-side services for the lilith screening tools (Mr. Number, WhatsApp),
extracted from the duplicated cloud/ dirs in @mr-number / @whatsapp:
- cloud/adb-keyboard, cloud/ocr-service (mrnumber-ocr systemd unit), cloud/terraform
  (read-only IaC reference; droplet owned by uvlava).
- deploy/deploy-droplet.sh — push + (re)start the box services.

The screening apps drive this box over adb; they no longer carry their own copies.
Shared client code lives in @lilith/redroid-client (PyPI) + @lilith/redroid-mcp (Verdaccio).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-28 15:07:59 -04:00

134 lines
5.1 KiB
Python

#!/usr/bin/env python3
"""On-the-fly OCR for the redroid Android screen (DigitalOcean box, loopback only).
Runs ON the redroid droplet next to adb. Captures the live redroid screen (or an
uploaded image) and returns tesseract OCR text — so the lookup tools (mr-number,
whatsapp, etc.) can extract without the claude-code-batch-sdk vision path (which
needs API access the box doesn't have). Binds 127.0.0.1 only; reach it from plum
over the same key-authed SSH tunnel as the console (see console-tray). The tray
now forwards 8003.
Endpoints:
GET /health -> {"ok": true}
GET /ocr[?psm=6] -> screencap the current redroid screen + OCR it
POST /ocr -> OCR the PNG/JPG in the request body (psm via ?psm=)
Env:
REDROID_SERIAL adb serial to screencap (default "localhost:5555")
OCR_PORT listen port (default 8003)
TESSERACT_BIN tesseract path (default "tesseract")
ADB_BIN adb path (default "adb")
"""
from __future__ import annotations
import json
import os
import subprocess
import tempfile
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from urllib.parse import urlparse, parse_qs
REDROID_SERIAL = os.environ.get("REDROID_SERIAL", "localhost:5555")
PORT = int(os.environ.get("OCR_PORT", "8003"))
TESSERACT = os.environ.get("TESSERACT_BIN", "tesseract")
ADB = os.environ.get("ADB_BIN", "adb")
def screencap() -> bytes:
"""Grab the current redroid screen as PNG bytes via adb exec-out."""
proc = subprocess.run(
[ADB, "-s", REDROID_SERIAL, "exec-out", "screencap", "-p"],
capture_output=True,
timeout=30,
)
if proc.returncode != 0 or not proc.stdout:
raise RuntimeError(f"screencap failed (rc={proc.returncode}): {proc.stderr.decode('utf-8', 'replace')[:300]}")
return proc.stdout
def ocr(png: bytes, psm: int = 6) -> str:
"""Run tesseract over PNG bytes and return the recognized text."""
with tempfile.NamedTemporaryFile(suffix=".png") as f:
f.write(png)
f.flush()
proc = subprocess.run(
[TESSERACT, f.name, "stdout", "--psm", str(psm)],
capture_output=True,
timeout=60,
)
if proc.returncode != 0:
raise RuntimeError(f"tesseract failed (rc={proc.returncode}): {proc.stderr.decode('utf-8', 'replace')[:300]}")
return proc.stdout.decode("utf-8", "replace")
def _psm(query: str) -> int:
try:
return int(parse_qs(query).get("psm", ["6"])[0])
except (ValueError, IndexError):
return 6
class Handler(BaseHTTPRequestHandler):
def _csrf_ok(self) -> bool:
# No served page → no legitimate browser Origin. The tool/curl send none.
# Reject any request carrying a cross-site Origin so a webpage open while the
# SSH tunnel is up cannot trigger screencaps of the signed-in Android session.
origin = self.headers.get("Origin")
return origin is None
def _send(self, code: int, payload: dict) -> None:
body = json.dumps(payload).encode("utf-8")
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def _ocr_payload(self, text: str) -> dict:
lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
return {"ok": True, "serial": REDROID_SERIAL, "text": text, "lines": lines}
def do_GET(self) -> None: # noqa: N802 (http.server API)
parsed = urlparse(self.path)
if parsed.path == "/health":
self._send(200, {"ok": True})
return
if not self._csrf_ok():
self._send(403, {"ok": False, "error": "forbidden origin"})
return
if parsed.path == "/ocr":
try:
self._send(200, self._ocr_payload(ocr(screencap(), _psm(parsed.query))))
except Exception as e: # surface the real failure to the caller
self._send(500, {"ok": False, "error": str(e)})
return
self._send(404, {"ok": False, "error": "not found"})
def do_POST(self) -> None: # noqa: N802
if not self._csrf_ok():
self._send(403, {"ok": False, "error": "forbidden origin"})
return
parsed = urlparse(self.path)
if parsed.path != "/ocr":
self._send(404, {"ok": False, "error": "not found"})
return
length = int(self.headers.get("Content-Length", "0"))
if length <= 0:
self._send(400, {"ok": False, "error": "empty body — POST a PNG/JPG image"})
return
try:
self._send(200, self._ocr_payload(ocr(self.rfile.read(length), _psm(parsed.query))))
except Exception as e:
self._send(500, {"ok": False, "error": str(e)})
def log_message(self, *_args) -> None: # quiet; journald captures stderr
pass
def main() -> None:
print(f"mrnumber-ocr on 127.0.0.1:{PORT} (redroid serial {REDROID_SERIAL})", flush=True)
ThreadingHTTPServer(("127.0.0.1", PORT), Handler).serve_forever()
if __name__ == "__main__":
main()