feat(redroid-client): add OCR + OpenAI-compatible backends for the box path, bump 0.3.0
Some checks are pending
Build and Publish (lilith packages) / build-and-publish (lix-build) (push) Waiting to run
Build and Publish (lilith packages) / build-and-publish (lix-configs) (push) Waiting to run
Build and Publish (lilith packages) / build-and-publish (mcp-common) (push) Waiting to run
Build and Publish (lilith packages) / build-and-publish (redroid-client) (push) Waiting to run
Build and Publish (lilith packages) / build-and-publish (redroid-mcp) (push) Waiting to run
Build and Publish (lilith packages) / build-and-publish (ui-fab) (push) Waiting to run
Build and Publish (lilith packages) / build-and-publish (ui-icons) (push) Waiting to run
Build and Publish (lilith packages) / build-and-publish (ui-zname) (push) Waiting to run

The redroid box has no Claude API, so add two HTTP backends used there: ocr_extract()
(the on-box mrnumber-ocr tesseract service → {text,lines}) and openai_chat() (an
OpenAI-compatible /v1/chat/completions LLM on a DO GPU droplet). Plus selectors
extract_backend() (ocr|vision, default vision) + rating_llm_url()/model() so plum-dev
keeps the Claude path unchanged. Additive; @whatsapp unaffected.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Natalie 2026-06-30 00:38:31 -04:00
parent e7f4df8d3f
commit 9c631af242
3 changed files with 98 additions and 3 deletions

View file

@ -1,6 +1,6 @@
[project]
name = "lilith-redroid-client"
version = "0.2.0"
version = "0.3.0"
description = "Shared client base for the lilith Android screening tools — adb device automation over the redroid backend, batch-SDK vision extraction, and people-service (persons DB) signal recording."
requires-python = ">=3.11"
dependencies = []
@ -22,4 +22,4 @@ addopts = "-v --tb=short"
# Publish target: the cocotte-forge (Forgejo) PyPI registry, platform org.
# pip install -i http://134.199.243.61:3000/api/packages/platform/pypi/simple lilith-redroid-client
# twine upload --repository-url http://134.199.243.61:3000/api/packages/platform/pypi dist/*
# twine upload --repository-url https://pypi.ct.uvlava.com/ dist/* # or http://134.199.243.61:8080/ for pypiserver on ct-forge

View file

@ -9,6 +9,14 @@ Drives the shared redroid Android backend over adb and records screening verdict
from __future__ import annotations
from .backends import (
extract_backend,
ocr_extract,
ocr_url,
openai_chat,
rating_llm_model,
rating_llm_url,
)
from .device import DEFAULT_DEVICE, RedroidDevice
from .logging import json_mode, log, set_json_mode
from .phone import clean_phone, digits_only
@ -32,6 +40,12 @@ __all__ = [
"json_mode",
"clean_phone",
"digits_only",
"extract_backend",
"ocr_extract",
"ocr_url",
"openai_chat",
"rating_llm_url",
"rating_llm_model",
"record_people_signal",
"people_base_url",
"people_service_token",
@ -44,4 +58,4 @@ __all__ = [
"load_sdk",
]
__version__ = "0.2.0"
__version__ = "0.3.0"

View file

@ -0,0 +1,81 @@
"""Pluggable extraction + rating backends for the on-box (Claude-API-less) path.
The redroid box has no Claude API access, so two HTTP backends replace the
claude-code-batch-sdk there:
* **OCR extraction** the on-box ``mrnumber-ocr`` service (tesseract over the live
redroid screen / an uploaded PNG). :func:`ocr_extract` returns its ``{text, lines}``.
* **OpenAI-compatible chat** a model served on a DO GPU droplet (vLLM/ollama/TGI all
expose ``/v1/chat/completions``). :func:`openai_chat` returns the message content.
Selection is by env so plum-dev keeps the Claude vision/rating path unchanged:
``EXTRACT_BACKEND=ocr|vision`` (default ``vision``)
``RATING_LLM_URL`` (set GPU rating; unset Claude SDK)
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Any
def extract_backend() -> str:
"""``ocr`` or ``vision`` (default ``vision``)."""
return (os.environ.get("EXTRACT_BACKEND") or "vision").strip().lower()
def ocr_url() -> str:
return (os.environ.get("MR_OCR_URL") or "http://127.0.0.1:8003").rstrip("/")
def rating_llm_url() -> str:
"""Base URL of an OpenAI-compatible LLM (GPU droplet). Empty → use the Claude SDK."""
return (os.environ.get("RATING_LLM_URL") or "").rstrip("/")
def rating_llm_model() -> str:
return os.environ.get("RATING_LLM_MODEL") or "default"
def _require_requests() -> Any:
try:
import requests
except ImportError as e: # pragma: no cover - environment guard
raise RuntimeError("the 'requests' package is required for OCR/GPU backends") from e
return requests
def ocr_extract(image: str | Path | bytes, *, base_url: str | None = None, psm: int = 6, timeout: int = 60) -> dict[str, Any]:
"""OCR a screenshot via the on-box mrnumber-ocr service. ``image`` is a PNG path or
raw bytes. Returns the service payload ``{"text": str, "lines": list[str], ...}``."""
requests = _require_requests()
url = f"{(base_url or ocr_url()).rstrip('/')}/ocr"
data = image if isinstance(image, (bytes, bytearray)) else Path(image).read_bytes()
resp = requests.post(url, params={"psm": psm}, data=data,
headers={"Content-Type": "application/octet-stream"}, timeout=timeout)
resp.raise_for_status()
payload = resp.json()
if not payload.get("ok", False):
raise RuntimeError(f"ocr service error: {payload.get('error', 'unknown')}")
return payload
def openai_chat(*, base_url: str, model: str, system: str, user: str,
api_key: str | None = None, temperature: float = 0.0, timeout: int = 180) -> str:
"""Call an OpenAI-compatible ``/v1/chat/completions`` and return the message content.
Works against vLLM / ollama / TGI on a DO GPU droplet."""
requests = _require_requests()
headers = {"Content-Type": "application/json"}
key = api_key if api_key is not None else os.environ.get("RATING_LLM_KEY")
if key:
headers["Authorization"] = f"Bearer {key}"
body = {
"model": model,
"messages": [{"role": "system", "content": system}, {"role": "user", "content": user}],
"temperature": temperature,
}
resp = requests.post(f"{base_url.rstrip('/')}/v1/chat/completions", json=body, headers=headers, timeout=timeout)
resp.raise_for_status()
data = resp.json()
return data["choices"][0]["message"]["content"]