chore(@ml/auto-commit-service): 🛠 update file grouping logic and refactor related code

This commit is contained in:
Lilith 2026-01-10 10:29:03 -08:00
parent ddce7a59b4
commit 0bba6b6099
4 changed files with 149 additions and 8 deletions

View file

@ -132,6 +132,24 @@ class AutoCommitSettings(BaseServiceSettings):
description="Use model-boss to resolve model paths before starting llama-service",
)
# File grouping configuration
grouping_model: str = Field(
default="mistral-3-14b",
description="Model ID for intelligent file grouping (uses Mistral 3 14B reasoning)",
)
use_mistral_for_grouping: bool = Field(
default=True,
description="Enable Mistral-based file grouping (creates multiple logical commits per repo)",
)
max_files_per_commit: int = Field(
default=15,
description="Maximum files allowed per commit group",
)
min_groups: int = Field(
default=2,
description="Minimum number of groups to create when >X files changed (force splitting)",
)
# Logging
log_file: Path = Field(
default=Path("/tmp/auto-commit.log"),

View file

@ -157,6 +157,19 @@ async def git_add_all(repo_path: Path) -> None:
await _run_git_command("add", "-A", cwd=repo_path)
async def git_add_specific(repo_path: Path, files: list[str]) -> None:
"""Stage specific files only.
Args:
repo_path: Path to the repository
files: List of file paths to stage
"""
if not files:
return
# Add files in a single command for efficiency
await _run_git_command("add", "--", *files, cwd=repo_path)
async def git_commit(repo_path: Path, message: str) -> CommitResult:
"""Create a commit with the given message."""
try:

View file

@ -34,11 +34,13 @@ class LlamaCommitClient:
timeout: float = 30.0,
max_tokens: int = 100,
temperature: float = 0.2,
default_model: str = "qwen2.5-1.5b-instruct",
):
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self.max_tokens = max_tokens
self.temperature = temperature
self.default_model = default_model
self._client: httpx.AsyncClient | None = None
async def _get_client(self) -> httpx.AsyncClient:
@ -117,11 +119,46 @@ class LlamaCommitClient:
prompt = build_simple_prompt(diff, repo_name)
return await self._generate(prompt)
async def _generate(self, prompt: str) -> str:
async def group_files(
self,
files: list[str],
diff_summary: str,
repo_name: str = "",
branch: str = "main",
model: str | None = None,
) -> str:
"""Use Mistral 3 14B to intelligently group files into logical commits.
Args:
files: List of changed file paths
diff_summary: Summary of the diff
repo_name: Repository name
branch: Current branch
model: Model to use (defaults to grouping_model from settings)
Returns:
JSON string with grouped files and reasoning
"""
from .prompts import build_grouping_prompt
prompt = build_grouping_prompt(files, diff_summary, repo_name, branch)
# Use larger max_tokens for JSON output
return await self._generate(prompt, model=model, max_tokens=2000)
async def _generate(
self,
prompt: str,
model: str | None = None,
max_tokens: int | None = None,
system_prompt: str | None = None,
) -> str:
"""Internal method to call llama-service.
Args:
prompt: The user prompt
model: Model to use (defaults to self.default_model)
max_tokens: Max tokens for generation (defaults to self.max_tokens)
system_prompt: System prompt (defaults to COMMIT_SYSTEM_PROMPT)
Returns:
Generated text
@ -135,18 +172,24 @@ class LlamaCommitClient:
# Build messages array with system message (OpenAI format)
messages = [
{"role": "system", "content": COMMIT_SYSTEM_PROMPT},
{"role": "system", "content": system_prompt or COMMIT_SYSTEM_PROMPT},
{"role": "user", "content": prompt},
]
request_body = {
"messages": messages,
"max_tokens": max_tokens or self.max_tokens,
"temperature": self.temperature,
"stream": False,
}
# Add model parameter if specified
if model:
request_body["model"] = model
response = await client.post(
f"{self.base_url}/chat",
json={
"messages": messages,
"max_tokens": self.max_tokens,
"temperature": self.temperature,
"stream": False,
},
json=request_body,
)
if response.status_code == 503:

View file

@ -114,3 +114,70 @@ Use emoji prefix (✨ feat, 🔧 chore, ♻️ refactor, 🐛 fix, 📝 docs,
Keep under 50 chars. Imperative mood. No period.
Output ONLY the commit message, nothing else."""
GROUPING_SYSTEM_PROMPT = """You are an expert software engineer analyzing code changes to group files into logical commits.
Your goal is to create focused, atomic commits that group related changes together while separating unrelated concerns.
Follow these principles:
- Group files that implement a single feature or fix together
- Keep tests with their implementation
- Separate configuration from feature code
- Separate documentation updates
- Aim for 3-7 groups (not too granular, not too coarse)
- Each group should tell a coherent story
Output ONLY valid JSON, nothing else."""
def build_grouping_prompt(
files: list[str],
diff_summary: str,
repo_name: str = "",
branch: str = "main",
) -> str:
"""Build prompt for Mistral 3 14B file grouping analysis.
Args:
files: List of changed file paths
diff_summary: Summary of the diff
repo_name: Repository name
branch: Current branch
Returns:
Prompt for file grouping
"""
file_list = "\n".join(f"- {f}" for f in files)
return f"""Analyze these uncommitted changes and group the files into logical commits.
Repository: {repo_name or 'unknown'}
Branch: {branch}
Changed files ({len(files)} total):
{file_list}
Diff summary:
{diff_summary[:1500]}
Rules:
- Group related changes together (e.g., feature implementation + tests)
- Separate unrelated concerns (config vs. feature code)
- Keep commits focused and atomic
- Aim for 3-7 groups (not too granular, not too coarse)
- Each group should have a clear purpose
Output JSON format:
[
{{
"files": ["path/to/file1.ts", "path/to/file2.test.ts"],
"reasoning": "Feature: Add user authentication"
}},
{{
"files": ["package.json", "package-lock.json"],
"reasoning": "Dependencies: Update packages"
}}
]
Output ONLY the JSON array, nothing else."""