From 9c631af242fec3f8f92a68fe43da88c43d0838b9 Mon Sep 17 00:00:00 2001 From: Natalie Date: Tue, 30 Jun 2026 00:38:31 -0400 Subject: [PATCH] feat(redroid-client): add OCR + OpenAI-compatible backends for the box path, bump 0.3.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The redroid box has no Claude API, so add two HTTP backends used there: ocr_extract() (the on-box mrnumber-ocr tesseract service → {text,lines}) and openai_chat() (an OpenAI-compatible /v1/chat/completions LLM on a DO GPU droplet). Plus selectors extract_backend() (ocr|vision, default vision) + rating_llm_url()/model() so plum-dev keeps the Claude path unchanged. Additive; @whatsapp unaffected. Co-Authored-By: Claude Opus 4.8 --- redroid-client/pyproject.toml | 4 +- redroid-client/src/redroid_client/__init__.py | 16 +++- redroid-client/src/redroid_client/backends.py | 81 +++++++++++++++++++ 3 files changed, 98 insertions(+), 3 deletions(-) create mode 100644 redroid-client/src/redroid_client/backends.py diff --git a/redroid-client/pyproject.toml b/redroid-client/pyproject.toml index d6d72fa..f2d884e 100644 --- a/redroid-client/pyproject.toml +++ b/redroid-client/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lilith-redroid-client" -version = "0.2.0" +version = "0.3.0" description = "Shared client base for the lilith Android screening tools — adb device automation over the redroid backend, batch-SDK vision extraction, and people-service (persons DB) signal recording." requires-python = ">=3.11" dependencies = [] @@ -22,4 +22,4 @@ addopts = "-v --tb=short" # Publish target: the cocotte-forge (Forgejo) PyPI registry, platform org. # pip install -i http://134.199.243.61:3000/api/packages/platform/pypi/simple lilith-redroid-client -# twine upload --repository-url http://134.199.243.61:3000/api/packages/platform/pypi dist/* +# twine upload --repository-url https://pypi.ct.uvlava.com/ dist/* # or http://134.199.243.61:8080/ for pypiserver on ct-forge diff --git a/redroid-client/src/redroid_client/__init__.py b/redroid-client/src/redroid_client/__init__.py index 1fd4c5f..5d65712 100644 --- a/redroid-client/src/redroid_client/__init__.py +++ b/redroid-client/src/redroid_client/__init__.py @@ -9,6 +9,14 @@ Drives the shared redroid Android backend over adb and records screening verdict from __future__ import annotations +from .backends import ( + extract_backend, + ocr_extract, + ocr_url, + openai_chat, + rating_llm_model, + rating_llm_url, +) from .device import DEFAULT_DEVICE, RedroidDevice from .logging import json_mode, log, set_json_mode from .phone import clean_phone, digits_only @@ -32,6 +40,12 @@ __all__ = [ "json_mode", "clean_phone", "digits_only", + "extract_backend", + "ocr_extract", + "ocr_url", + "openai_chat", + "rating_llm_url", + "rating_llm_model", "record_people_signal", "people_base_url", "people_service_token", @@ -44,4 +58,4 @@ __all__ = [ "load_sdk", ] -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/redroid-client/src/redroid_client/backends.py b/redroid-client/src/redroid_client/backends.py new file mode 100644 index 0000000..aaf0074 --- /dev/null +++ b/redroid-client/src/redroid_client/backends.py @@ -0,0 +1,81 @@ +"""Pluggable extraction + rating backends for the on-box (Claude-API-less) path. + +The redroid box has no Claude API access, so two HTTP backends replace the +claude-code-batch-sdk there: + +* **OCR extraction** — the on-box ``mrnumber-ocr`` service (tesseract over the live + redroid screen / an uploaded PNG). :func:`ocr_extract` returns its ``{text, lines}``. +* **OpenAI-compatible chat** — a model served on a DO GPU droplet (vLLM/ollama/TGI all + expose ``/v1/chat/completions``). :func:`openai_chat` returns the message content. + +Selection is by env so plum-dev keeps the Claude vision/rating path unchanged: + ``EXTRACT_BACKEND=ocr|vision`` (default ``vision``) + ``RATING_LLM_URL`` (set → GPU rating; unset → Claude SDK) +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any + + +def extract_backend() -> str: + """``ocr`` or ``vision`` (default ``vision``).""" + return (os.environ.get("EXTRACT_BACKEND") or "vision").strip().lower() + + +def ocr_url() -> str: + return (os.environ.get("MR_OCR_URL") or "http://127.0.0.1:8003").rstrip("/") + + +def rating_llm_url() -> str: + """Base URL of an OpenAI-compatible LLM (GPU droplet). Empty → use the Claude SDK.""" + return (os.environ.get("RATING_LLM_URL") or "").rstrip("/") + + +def rating_llm_model() -> str: + return os.environ.get("RATING_LLM_MODEL") or "default" + + +def _require_requests() -> Any: + try: + import requests + except ImportError as e: # pragma: no cover - environment guard + raise RuntimeError("the 'requests' package is required for OCR/GPU backends") from e + return requests + + +def ocr_extract(image: str | Path | bytes, *, base_url: str | None = None, psm: int = 6, timeout: int = 60) -> dict[str, Any]: + """OCR a screenshot via the on-box mrnumber-ocr service. ``image`` is a PNG path or + raw bytes. Returns the service payload ``{"text": str, "lines": list[str], ...}``.""" + requests = _require_requests() + url = f"{(base_url or ocr_url()).rstrip('/')}/ocr" + data = image if isinstance(image, (bytes, bytearray)) else Path(image).read_bytes() + resp = requests.post(url, params={"psm": psm}, data=data, + headers={"Content-Type": "application/octet-stream"}, timeout=timeout) + resp.raise_for_status() + payload = resp.json() + if not payload.get("ok", False): + raise RuntimeError(f"ocr service error: {payload.get('error', 'unknown')}") + return payload + + +def openai_chat(*, base_url: str, model: str, system: str, user: str, + api_key: str | None = None, temperature: float = 0.0, timeout: int = 180) -> str: + """Call an OpenAI-compatible ``/v1/chat/completions`` and return the message content. + Works against vLLM / ollama / TGI on a DO GPU droplet.""" + requests = _require_requests() + headers = {"Content-Type": "application/json"} + key = api_key if api_key is not None else os.environ.get("RATING_LLM_KEY") + if key: + headers["Authorization"] = f"Bearer {key}" + body = { + "model": model, + "messages": [{"role": "system", "content": system}, {"role": "user", "content": user}], + "temperature": temperature, + } + resp = requests.post(f"{base_url.rstrip('/')}/v1/chat/completions", json=body, headers=headers, timeout=timeout) + resp.raise_for_status() + data = resp.json() + return data["choices"][0]["message"]["content"]