No description
|
|
||
|---|---|---|
| .forgejo/workflows | ||
| src/lilith_ollama_provider | ||
| pyproject.toml | ||
| README.md | ||
lilith-ollama-provider
Ollama LLM provider client with synchronous and asynchronous support, designed for Ministral models.
Features
- Sync & Async: Both synchronous and asynchronous clients
- Streaming: Real-time token streaming support
- Chat & Generate: Both completion and chat APIs
- Model Management: List, pull, and check model status
- Health Checks: Verify Ollama availability
- Pydantic Models: Full type safety with Pydantic v2
Installation
pip install lilith-ollama-provider
Quick Start
from lilith_ollama_provider import OllamaProvider, OllamaAsyncProvider, OllamaConfig
# Synchronous client
provider = OllamaProvider(OllamaConfig(
host="http://localhost:11434",
model="ministral",
))
response = provider.generate("What is the capital of France?")
print(response.response)
# Asynchronous client
async_provider = OllamaAsyncProvider(OllamaConfig(
host="http://localhost:11434",
model="ministral",
))
response = await async_provider.generate("What is the capital of France?")
API Reference
Configuration
from lilith_ollama_provider import OllamaConfig
config = OllamaConfig(
host="http://localhost:11434", # Ollama server URL
model="ministral", # Default model
timeout=120.0, # Request timeout
num_ctx=4096, # Context window size
temperature=0.7, # Generation temperature
top_p=0.9, # Top-p sampling
top_k=40, # Top-k sampling
)
Generate (Completion)
from lilith_ollama_provider import GenerateRequest
request = GenerateRequest(
prompt="Write a haiku about Python",
system="You are a poet.",
temperature=0.8,
)
response = provider.generate(request)
# Or simply:
response = provider.generate("Write a haiku about Python")
print(response.response)
print(response.total_duration) # Generation time
print(response.eval_count) # Tokens generated
Generate with Streaming
for chunk in provider.generate_stream("Tell me a story"):
print(chunk.response, end="", flush=True)
if chunk.done:
print(f"\nTotal tokens: {chunk.eval_count}")
Chat
from lilith_ollama_provider import ChatMessage, ChatRequest
messages = [
ChatMessage(role="system", content="You are a helpful assistant."),
ChatMessage(role="user", content="Hello!"),
]
response = provider.chat(ChatRequest(messages=messages))
print(response.message.content)
Chat with Streaming
async for chunk in async_provider.chat_stream(messages):
if chunk.message:
print(chunk.message.content, end="", flush=True)
Model Information
# List available models
models = provider.list_models()
for model in models:
print(f"{model.name}: {model.size / 1e9:.1f}GB")
# Get specific model info
info = provider.get_model_info("ministral")
print(f"Parameters: {info.details.parameter_size}")
# Check if model exists
exists = provider.model_exists("ministral")
Health Check
health = provider.health_check()
if health.healthy:
print(f"Ollama OK, {len(health.models)} models available")
else:
print(f"Ollama error: {health.error}")
Types
Request Types
class GenerateRequest(BaseModel):
prompt: str
model: str | None = None
system: str | None = None
template: str | None = None
context: list[int] | None = None
stream: bool = False
raw: bool = False
format: str | None = None # "json" for JSON mode
options: dict | None = None
class ChatMessage(BaseModel):
role: Literal["system", "user", "assistant"]
content: str
images: list[str] | None = None # Base64 images
class ChatRequest(BaseModel):
messages: list[ChatMessage]
model: str | None = None
stream: bool = False
format: str | None = None
options: dict | None = None
Response Types
class GenerateResponse(BaseModel):
model: str
response: str
done: bool
context: list[int] | None = None
total_duration: int | None = None
load_duration: int | None = None
prompt_eval_count: int | None = None
eval_count: int | None = None
eval_duration: int | None = None
class ChatResponse(BaseModel):
model: str
message: ChatMessage
done: bool
total_duration: int | None = None
eval_count: int | None = None
class ModelInfo(BaseModel):
name: str
modified_at: str
size: int
digest: str
details: ModelDetails
class HealthStatus(BaseModel):
healthy: bool
models: list[str]
error: str | None = None
Async Usage
import asyncio
from lilith_ollama_provider import OllamaAsyncProvider, OllamaConfig
async def main():
provider = OllamaAsyncProvider(OllamaConfig())
# Check health
health = await provider.health_check()
# Generate
response = await provider.generate("Hello!")
# Stream
async for chunk in provider.generate_stream("Tell a story"):
print(chunk.response, end="")
asyncio.run(main())
Error Handling
from lilith_ollama_provider import OllamaProvider
from httpx import HTTPError
try:
response = provider.generate("Hello")
except HTTPError as e:
print(f"Connection error: {e}")
except Exception as e:
print(f"Generation error: {e}")
Dependencies
ollama- Official Ollama Python clientpydantic- Data validation
License
MIT