chore(@ml/auto-commit-service): 🛠 update file grouping logic and refactor related code
This commit is contained in:
parent
ddce7a59b4
commit
0bba6b6099
4 changed files with 149 additions and 8 deletions
|
|
@ -132,6 +132,24 @@ class AutoCommitSettings(BaseServiceSettings):
|
|||
description="Use model-boss to resolve model paths before starting llama-service",
|
||||
)
|
||||
|
||||
# File grouping configuration
|
||||
grouping_model: str = Field(
|
||||
default="mistral-3-14b",
|
||||
description="Model ID for intelligent file grouping (uses Mistral 3 14B reasoning)",
|
||||
)
|
||||
use_mistral_for_grouping: bool = Field(
|
||||
default=True,
|
||||
description="Enable Mistral-based file grouping (creates multiple logical commits per repo)",
|
||||
)
|
||||
max_files_per_commit: int = Field(
|
||||
default=15,
|
||||
description="Maximum files allowed per commit group",
|
||||
)
|
||||
min_groups: int = Field(
|
||||
default=2,
|
||||
description="Minimum number of groups to create when >X files changed (force splitting)",
|
||||
)
|
||||
|
||||
# Logging
|
||||
log_file: Path = Field(
|
||||
default=Path("/tmp/auto-commit.log"),
|
||||
|
|
|
|||
|
|
@ -157,6 +157,19 @@ async def git_add_all(repo_path: Path) -> None:
|
|||
await _run_git_command("add", "-A", cwd=repo_path)
|
||||
|
||||
|
||||
async def git_add_specific(repo_path: Path, files: list[str]) -> None:
|
||||
"""Stage specific files only.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the repository
|
||||
files: List of file paths to stage
|
||||
"""
|
||||
if not files:
|
||||
return
|
||||
# Add files in a single command for efficiency
|
||||
await _run_git_command("add", "--", *files, cwd=repo_path)
|
||||
|
||||
|
||||
async def git_commit(repo_path: Path, message: str) -> CommitResult:
|
||||
"""Create a commit with the given message."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -34,11 +34,13 @@ class LlamaCommitClient:
|
|||
timeout: float = 30.0,
|
||||
max_tokens: int = 100,
|
||||
temperature: float = 0.2,
|
||||
default_model: str = "qwen2.5-1.5b-instruct",
|
||||
):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.timeout = timeout
|
||||
self.max_tokens = max_tokens
|
||||
self.temperature = temperature
|
||||
self.default_model = default_model
|
||||
self._client: httpx.AsyncClient | None = None
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
|
|
@ -117,11 +119,46 @@ class LlamaCommitClient:
|
|||
prompt = build_simple_prompt(diff, repo_name)
|
||||
return await self._generate(prompt)
|
||||
|
||||
async def _generate(self, prompt: str) -> str:
|
||||
async def group_files(
|
||||
self,
|
||||
files: list[str],
|
||||
diff_summary: str,
|
||||
repo_name: str = "",
|
||||
branch: str = "main",
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
"""Use Mistral 3 14B to intelligently group files into logical commits.
|
||||
|
||||
Args:
|
||||
files: List of changed file paths
|
||||
diff_summary: Summary of the diff
|
||||
repo_name: Repository name
|
||||
branch: Current branch
|
||||
model: Model to use (defaults to grouping_model from settings)
|
||||
|
||||
Returns:
|
||||
JSON string with grouped files and reasoning
|
||||
"""
|
||||
from .prompts import build_grouping_prompt
|
||||
|
||||
prompt = build_grouping_prompt(files, diff_summary, repo_name, branch)
|
||||
# Use larger max_tokens for JSON output
|
||||
return await self._generate(prompt, model=model, max_tokens=2000)
|
||||
|
||||
async def _generate(
|
||||
self,
|
||||
prompt: str,
|
||||
model: str | None = None,
|
||||
max_tokens: int | None = None,
|
||||
system_prompt: str | None = None,
|
||||
) -> str:
|
||||
"""Internal method to call llama-service.
|
||||
|
||||
Args:
|
||||
prompt: The user prompt
|
||||
model: Model to use (defaults to self.default_model)
|
||||
max_tokens: Max tokens for generation (defaults to self.max_tokens)
|
||||
system_prompt: System prompt (defaults to COMMIT_SYSTEM_PROMPT)
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
|
|
@ -135,18 +172,24 @@ class LlamaCommitClient:
|
|||
|
||||
# Build messages array with system message (OpenAI format)
|
||||
messages = [
|
||||
{"role": "system", "content": COMMIT_SYSTEM_PROMPT},
|
||||
{"role": "system", "content": system_prompt or COMMIT_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt},
|
||||
]
|
||||
|
||||
request_body = {
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens or self.max_tokens,
|
||||
"temperature": self.temperature,
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
# Add model parameter if specified
|
||||
if model:
|
||||
request_body["model"] = model
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/chat",
|
||||
json={
|
||||
"messages": messages,
|
||||
"max_tokens": self.max_tokens,
|
||||
"temperature": self.temperature,
|
||||
"stream": False,
|
||||
},
|
||||
json=request_body,
|
||||
)
|
||||
|
||||
if response.status_code == 503:
|
||||
|
|
|
|||
|
|
@ -114,3 +114,70 @@ Use emoji prefix (✨ feat, 🔧 chore, ♻️ refactor, 🐛 fix, 📝 docs,
|
|||
Keep under 50 chars. Imperative mood. No period.
|
||||
|
||||
Output ONLY the commit message, nothing else."""
|
||||
|
||||
|
||||
GROUPING_SYSTEM_PROMPT = """You are an expert software engineer analyzing code changes to group files into logical commits.
|
||||
|
||||
Your goal is to create focused, atomic commits that group related changes together while separating unrelated concerns.
|
||||
|
||||
Follow these principles:
|
||||
- Group files that implement a single feature or fix together
|
||||
- Keep tests with their implementation
|
||||
- Separate configuration from feature code
|
||||
- Separate documentation updates
|
||||
- Aim for 3-7 groups (not too granular, not too coarse)
|
||||
- Each group should tell a coherent story
|
||||
|
||||
Output ONLY valid JSON, nothing else."""
|
||||
|
||||
|
||||
def build_grouping_prompt(
|
||||
files: list[str],
|
||||
diff_summary: str,
|
||||
repo_name: str = "",
|
||||
branch: str = "main",
|
||||
) -> str:
|
||||
"""Build prompt for Mistral 3 14B file grouping analysis.
|
||||
|
||||
Args:
|
||||
files: List of changed file paths
|
||||
diff_summary: Summary of the diff
|
||||
repo_name: Repository name
|
||||
branch: Current branch
|
||||
|
||||
Returns:
|
||||
Prompt for file grouping
|
||||
"""
|
||||
file_list = "\n".join(f"- {f}" for f in files)
|
||||
|
||||
return f"""Analyze these uncommitted changes and group the files into logical commits.
|
||||
|
||||
Repository: {repo_name or 'unknown'}
|
||||
Branch: {branch}
|
||||
|
||||
Changed files ({len(files)} total):
|
||||
{file_list}
|
||||
|
||||
Diff summary:
|
||||
{diff_summary[:1500]}
|
||||
|
||||
Rules:
|
||||
- Group related changes together (e.g., feature implementation + tests)
|
||||
- Separate unrelated concerns (config vs. feature code)
|
||||
- Keep commits focused and atomic
|
||||
- Aim for 3-7 groups (not too granular, not too coarse)
|
||||
- Each group should have a clear purpose
|
||||
|
||||
Output JSON format:
|
||||
[
|
||||
{{
|
||||
"files": ["path/to/file1.ts", "path/to/file2.test.ts"],
|
||||
"reasoning": "Feature: Add user authentication"
|
||||
}},
|
||||
{{
|
||||
"files": ["package.json", "package-lock.json"],
|
||||
"reasoning": "Dependencies: Update packages"
|
||||
}}
|
||||
]
|
||||
|
||||
Output ONLY the JSON array, nothing else."""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue