From 9c631af242fec3f8f92a68fe43da88c43d0838b9 Mon Sep 17 00:00:00 2001
From: Natalie <natalie@lilithuwu.com>
Date: Tue, 30 Jun 2026 00:38:31 -0400
Subject: [PATCH] feat(redroid-client): add OCR + OpenAI-compatible backends
 for the box path, bump 0.3.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The redroid box has no Claude API, so add two HTTP backends used there: ocr_extract()
(the on-box mrnumber-ocr tesseract service → {text,lines}) and openai_chat() (an
OpenAI-compatible /v1/chat/completions LLM on a DO GPU droplet). Plus selectors
extract_backend() (ocr|vision, default vision) + rating_llm_url()/model() so plum-dev
keeps the Claude path unchanged. Additive; @whatsapp unaffected.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 redroid-client/pyproject.toml                 |  4 +-
 redroid-client/src/redroid_client/__init__.py | 16 +++-
 redroid-client/src/redroid_client/backends.py | 81 +++++++++++++++++++
 3 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 redroid-client/src/redroid_client/backends.py

diff --git a/redroid-client/pyproject.toml b/redroid-client/pyproject.toml
index d6d72fa..f2d884e 100644
--- a/redroid-client/pyproject.toml
+++ b/redroid-client/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "lilith-redroid-client"
-version = "0.2.0"
+version = "0.3.0"
 description = "Shared client base for the lilith Android screening tools — adb device automation over the redroid backend, batch-SDK vision extraction, and people-service (persons DB) signal recording."
 requires-python = ">=3.11"
 dependencies = []
@@ -22,4 +22,4 @@ addopts = "-v --tb=short"
 
 # Publish target: the cocotte-forge (Forgejo) PyPI registry, platform org.
 #   pip install -i http://134.199.243.61:3000/api/packages/platform/pypi/simple lilith-redroid-client
-#   twine upload --repository-url http://134.199.243.61:3000/api/packages/platform/pypi dist/*
+#   twine upload --repository-url https://pypi.ct.uvlava.com/ dist/*   # or http://134.199.243.61:8080/ for pypiserver on ct-forge
diff --git a/redroid-client/src/redroid_client/__init__.py b/redroid-client/src/redroid_client/__init__.py
index 1fd4c5f..5d65712 100644
--- a/redroid-client/src/redroid_client/__init__.py
+++ b/redroid-client/src/redroid_client/__init__.py
@@ -9,6 +9,14 @@ Drives the shared redroid Android backend over adb and records screening verdict
 
 from __future__ import annotations
 
+from .backends import (
+    extract_backend,
+    ocr_extract,
+    ocr_url,
+    openai_chat,
+    rating_llm_model,
+    rating_llm_url,
+)
 from .device import DEFAULT_DEVICE, RedroidDevice
 from .logging import json_mode, log, set_json_mode
 from .phone import clean_phone, digits_only
@@ -32,6 +40,12 @@ __all__ = [
     "json_mode",
     "clean_phone",
     "digits_only",
+    "extract_backend",
+    "ocr_extract",
+    "ocr_url",
+    "openai_chat",
+    "rating_llm_url",
+    "rating_llm_model",
     "record_people_signal",
     "people_base_url",
     "people_service_token",
@@ -44,4 +58,4 @@ __all__ = [
     "load_sdk",
 ]
 
-__version__ = "0.2.0"
+__version__ = "0.3.0"
diff --git a/redroid-client/src/redroid_client/backends.py b/redroid-client/src/redroid_client/backends.py
new file mode 100644
index 0000000..aaf0074
--- /dev/null
+++ b/redroid-client/src/redroid_client/backends.py
@@ -0,0 +1,81 @@
+"""Pluggable extraction + rating backends for the on-box (Claude-API-less) path.
+
+The redroid box has no Claude API access, so two HTTP backends replace the
+claude-code-batch-sdk there:
+
+* **OCR extraction** — the on-box ``mrnumber-ocr`` service (tesseract over the live
+  redroid screen / an uploaded PNG). :func:`ocr_extract` returns its ``{text, lines}``.
+* **OpenAI-compatible chat** — a model served on a DO GPU droplet (vLLM/ollama/TGI all
+  expose ``/v1/chat/completions``). :func:`openai_chat` returns the message content.
+
+Selection is by env so plum-dev keeps the Claude vision/rating path unchanged:
+  ``EXTRACT_BACKEND=ocr|vision``  (default ``vision``)
+  ``RATING_LLM_URL``             (set → GPU rating; unset → Claude SDK)
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any
+
+
+def extract_backend() -> str:
+    """``ocr`` or ``vision`` (default ``vision``)."""
+    return (os.environ.get("EXTRACT_BACKEND") or "vision").strip().lower()
+
+
+def ocr_url() -> str:
+    return (os.environ.get("MR_OCR_URL") or "http://127.0.0.1:8003").rstrip("/")
+
+
+def rating_llm_url() -> str:
+    """Base URL of an OpenAI-compatible LLM (GPU droplet). Empty → use the Claude SDK."""
+    return (os.environ.get("RATING_LLM_URL") or "").rstrip("/")
+
+
+def rating_llm_model() -> str:
+    return os.environ.get("RATING_LLM_MODEL") or "default"
+
+
+def _require_requests() -> Any:
+    try:
+        import requests
+    except ImportError as e:  # pragma: no cover - environment guard
+        raise RuntimeError("the 'requests' package is required for OCR/GPU backends") from e
+    return requests
+
+
+def ocr_extract(image: str | Path | bytes, *, base_url: str | None = None, psm: int = 6, timeout: int = 60) -> dict[str, Any]:
+    """OCR a screenshot via the on-box mrnumber-ocr service. ``image`` is a PNG path or
+    raw bytes. Returns the service payload ``{"text": str, "lines": list[str], ...}``."""
+    requests = _require_requests()
+    url = f"{(base_url or ocr_url()).rstrip('/')}/ocr"
+    data = image if isinstance(image, (bytes, bytearray)) else Path(image).read_bytes()
+    resp = requests.post(url, params={"psm": psm}, data=data,
+                         headers={"Content-Type": "application/octet-stream"}, timeout=timeout)
+    resp.raise_for_status()
+    payload = resp.json()
+    if not payload.get("ok", False):
+        raise RuntimeError(f"ocr service error: {payload.get('error', 'unknown')}")
+    return payload
+
+
+def openai_chat(*, base_url: str, model: str, system: str, user: str,
+                api_key: str | None = None, temperature: float = 0.0, timeout: int = 180) -> str:
+    """Call an OpenAI-compatible ``/v1/chat/completions`` and return the message content.
+    Works against vLLM / ollama / TGI on a DO GPU droplet."""
+    requests = _require_requests()
+    headers = {"Content-Type": "application/json"}
+    key = api_key if api_key is not None else os.environ.get("RATING_LLM_KEY")
+    if key:
+        headers["Authorization"] = f"Bearer {key}"
+    body = {
+        "model": model,
+        "messages": [{"role": "system", "content": system}, {"role": "user", "content": user}],
+        "temperature": temperature,
+    }
+    resp = requests.post(f"{base_url.rstrip('/')}/v1/chat/completions", json=body, headers=headers, timeout=timeout)
+    resp.raise_for_status()
+    data = resp.json()
+    return data["choices"][0]["message"]["content"]