133 lines
4.9 KiB
Python
133 lines
4.9 KiB
Python
"""Tests for CommitDaemon and MultiModelLlamaClient queue integration.
|
|
|
|
Verifies that auto-commit-service sends batch priority, stay_warm=0,
|
|
and cooldown=60 to the model-boss coordinator via InferenceClient.
|
|
"""
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from auto_commit_service.config import AutoCommitSettings
|
|
|
|
|
|
@pytest.fixture
|
|
def settings():
|
|
"""Minimal settings for testing."""
|
|
return AutoCommitSettings(
|
|
service_name="test-daemon",
|
|
reasoning_model_id="ministral-14b-reasoning",
|
|
instruct_model_id="ministral-3b-instruct",
|
|
llm_timeout=30.0,
|
|
cycle_interval_seconds=1,
|
|
enabled=False,
|
|
)
|
|
|
|
|
|
class TestMultiModelClientQueueParams:
|
|
"""MultiModelLlamaClient passes queue params to InferenceClient."""
|
|
|
|
def test_default_client_id(self) -> None:
|
|
from auto_commit_service.llm.multi_model_client import MultiModelLlamaClient
|
|
|
|
client = MultiModelLlamaClient()
|
|
assert client._client_id == "auto-commit-service"
|
|
assert client._client._client_id == "auto-commit-service"
|
|
|
|
def test_custom_client_id(self) -> None:
|
|
from auto_commit_service.llm.multi_model_client import MultiModelLlamaClient
|
|
|
|
client = MultiModelLlamaClient(client_id="test-service")
|
|
assert client._client_id == "test-service"
|
|
assert client._client._client_id == "test-service"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_passes_queue_params(self) -> None:
|
|
"""_chat() POSTs task + queue params to /v1/chat/completions."""
|
|
from auto_commit_service.llm.multi_model_client import MultiModelLlamaClient
|
|
|
|
client = MultiModelLlamaClient(client_id="test-svc")
|
|
client._client = MagicMock()
|
|
client._client._coordinator_url = "http://coord.test"
|
|
client._client._default_priority = "normal"
|
|
|
|
captured: dict = {}
|
|
|
|
class _FakeResponse:
|
|
def raise_for_status(self) -> None:
|
|
return None
|
|
|
|
def json(self) -> dict:
|
|
return {"choices": [{"message": {"content": "response text"}}]}
|
|
|
|
class _FakeAsyncClient:
|
|
def __init__(self, *_args, **_kwargs) -> None:
|
|
pass
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, *_args) -> None:
|
|
return None
|
|
|
|
async def post(self, url: str, json: dict) -> _FakeResponse:
|
|
captured["url"] = url
|
|
captured["body"] = json
|
|
return _FakeResponse()
|
|
|
|
import httpx
|
|
with patch.object(httpx, "AsyncClient", _FakeAsyncClient):
|
|
result = await client._chat(
|
|
"summarization.short",
|
|
[{"role": "user", "content": "test"}],
|
|
max_tokens=100,
|
|
)
|
|
|
|
assert result == "response text"
|
|
assert captured["url"] == "http://coord.test/v1/chat/completions"
|
|
assert captured["body"]["task"] == "summarization.short"
|
|
assert captured["body"]["messages"] == [{"role": "user", "content": "test"}]
|
|
assert captured["body"]["max_tokens"] == 100
|
|
assert captured["body"]["temperature"] == client._temperature
|
|
assert captured["body"]["x_client_id"] == "test-svc"
|
|
assert captured["body"]["x_keep_alive"] == 300
|
|
assert "model" not in captured["body"]
|
|
|
|
|
|
class TestDaemonSimplifiedLoop:
|
|
"""Daemon loop no longer manages model lifecycle directly."""
|
|
|
|
def _make_daemon(self, settings):
|
|
mock_client = MagicMock()
|
|
mock_client.is_available = AsyncMock(return_value=True)
|
|
mock_client.ensure_services = AsyncMock()
|
|
mock_client.reasoning_model_id = settings.reasoning_model_id
|
|
mock_client.instruct_model_id = settings.instruct_model_id
|
|
|
|
from auto_commit_service.scheduler.daemon import CommitDaemon
|
|
|
|
return CommitDaemon(settings=settings, llm_client=mock_client)
|
|
|
|
def test_no_lifecycle_attrs(self, settings) -> None:
|
|
"""Daemon no longer has _models_loaded_since or _models_released_at."""
|
|
daemon = self._make_daemon(settings)
|
|
assert not hasattr(daemon, "_models_loaded_since")
|
|
assert not hasattr(daemon, "_models_released_at")
|
|
|
|
def test_no_lifecycle_methods(self, settings) -> None:
|
|
"""Daemon no longer has _should_force_release or _is_in_cooldown."""
|
|
daemon = self._make_daemon(settings)
|
|
assert not hasattr(daemon, "_should_force_release")
|
|
assert not hasattr(daemon, "_is_in_cooldown")
|
|
|
|
|
|
class TestConfigRemovedFields:
|
|
"""model_max_alive_seconds and model_cooldown_seconds are removed."""
|
|
|
|
def test_no_model_max_alive_seconds(self) -> None:
|
|
settings = AutoCommitSettings(service_name="test")
|
|
assert not hasattr(settings, "model_max_alive_seconds")
|
|
|
|
def test_no_model_cooldown_seconds(self) -> None:
|
|
settings = AutoCommitSettings(service_name="test")
|
|
assert not hasattr(settings, "model_cooldown_seconds")
|