diff --git a/src/auto_commit_service/config.py b/src/auto_commit_service/config.py index 20f5f85..278ebd2 100644 --- a/src/auto_commit_service/config.py +++ b/src/auto_commit_service/config.py @@ -132,6 +132,24 @@ class AutoCommitSettings(BaseServiceSettings): description="Use model-boss to resolve model paths before starting llama-service", ) + # File grouping configuration + grouping_model: str = Field( + default="mistral-3-14b", + description="Model ID for intelligent file grouping (uses Mistral 3 14B reasoning)", + ) + use_mistral_for_grouping: bool = Field( + default=True, + description="Enable Mistral-based file grouping (creates multiple logical commits per repo)", + ) + max_files_per_commit: int = Field( + default=15, + description="Maximum files allowed per commit group", + ) + min_groups: int = Field( + default=2, + description="Minimum number of groups to create when >X files changed (force splitting)", + ) + # Logging log_file: Path = Field( default=Path("/tmp/auto-commit.log"), diff --git a/src/auto_commit_service/git/operations.py b/src/auto_commit_service/git/operations.py index 702cbfc..d2cdda4 100644 --- a/src/auto_commit_service/git/operations.py +++ b/src/auto_commit_service/git/operations.py @@ -157,6 +157,19 @@ async def git_add_all(repo_path: Path) -> None: await _run_git_command("add", "-A", cwd=repo_path) +async def git_add_specific(repo_path: Path, files: list[str]) -> None: + """Stage specific files only. + + Args: + repo_path: Path to the repository + files: List of file paths to stage + """ + if not files: + return + # Add files in a single command for efficiency + await _run_git_command("add", "--", *files, cwd=repo_path) + + async def git_commit(repo_path: Path, message: str) -> CommitResult: """Create a commit with the given message.""" try: diff --git a/src/auto_commit_service/llm/client.py b/src/auto_commit_service/llm/client.py index 6c7fb4f..3cc7f4d 100644 --- a/src/auto_commit_service/llm/client.py +++ b/src/auto_commit_service/llm/client.py @@ -34,11 +34,13 @@ class LlamaCommitClient: timeout: float = 30.0, max_tokens: int = 100, temperature: float = 0.2, + default_model: str = "qwen2.5-1.5b-instruct", ): self.base_url = base_url.rstrip("/") self.timeout = timeout self.max_tokens = max_tokens self.temperature = temperature + self.default_model = default_model self._client: httpx.AsyncClient | None = None async def _get_client(self) -> httpx.AsyncClient: @@ -117,11 +119,46 @@ class LlamaCommitClient: prompt = build_simple_prompt(diff, repo_name) return await self._generate(prompt) - async def _generate(self, prompt: str) -> str: + async def group_files( + self, + files: list[str], + diff_summary: str, + repo_name: str = "", + branch: str = "main", + model: str | None = None, + ) -> str: + """Use Mistral 3 14B to intelligently group files into logical commits. + + Args: + files: List of changed file paths + diff_summary: Summary of the diff + repo_name: Repository name + branch: Current branch + model: Model to use (defaults to grouping_model from settings) + + Returns: + JSON string with grouped files and reasoning + """ + from .prompts import build_grouping_prompt + + prompt = build_grouping_prompt(files, diff_summary, repo_name, branch) + # Use larger max_tokens for JSON output + return await self._generate(prompt, model=model, max_tokens=2000) + + async def _generate( + self, + prompt: str, + model: str | None = None, + max_tokens: int | None = None, + system_prompt: str | None = None, + ) -> str: """Internal method to call llama-service. Args: prompt: The user prompt + model: Model to use (defaults to self.default_model) + max_tokens: Max tokens for generation (defaults to self.max_tokens) + system_prompt: System prompt (defaults to COMMIT_SYSTEM_PROMPT) Returns: Generated text @@ -135,18 +172,24 @@ class LlamaCommitClient: # Build messages array with system message (OpenAI format) messages = [ - {"role": "system", "content": COMMIT_SYSTEM_PROMPT}, + {"role": "system", "content": system_prompt or COMMIT_SYSTEM_PROMPT}, {"role": "user", "content": prompt}, ] + request_body = { + "messages": messages, + "max_tokens": max_tokens or self.max_tokens, + "temperature": self.temperature, + "stream": False, + } + + # Add model parameter if specified + if model: + request_body["model"] = model + response = await client.post( f"{self.base_url}/chat", - json={ - "messages": messages, - "max_tokens": self.max_tokens, - "temperature": self.temperature, - "stream": False, - }, + json=request_body, ) if response.status_code == 503: diff --git a/src/auto_commit_service/llm/prompts.py b/src/auto_commit_service/llm/prompts.py index 045cc4f..5334125 100644 --- a/src/auto_commit_service/llm/prompts.py +++ b/src/auto_commit_service/llm/prompts.py @@ -114,3 +114,70 @@ Use emoji prefix (✨ feat, 🔧 chore, ♻️ refactor, 🐛 fix, 📝 docs, Keep under 50 chars. Imperative mood. No period. Output ONLY the commit message, nothing else.""" + + +GROUPING_SYSTEM_PROMPT = """You are an expert software engineer analyzing code changes to group files into logical commits. + +Your goal is to create focused, atomic commits that group related changes together while separating unrelated concerns. + +Follow these principles: +- Group files that implement a single feature or fix together +- Keep tests with their implementation +- Separate configuration from feature code +- Separate documentation updates +- Aim for 3-7 groups (not too granular, not too coarse) +- Each group should tell a coherent story + +Output ONLY valid JSON, nothing else.""" + + +def build_grouping_prompt( + files: list[str], + diff_summary: str, + repo_name: str = "", + branch: str = "main", +) -> str: + """Build prompt for Mistral 3 14B file grouping analysis. + + Args: + files: List of changed file paths + diff_summary: Summary of the diff + repo_name: Repository name + branch: Current branch + + Returns: + Prompt for file grouping + """ + file_list = "\n".join(f"- {f}" for f in files) + + return f"""Analyze these uncommitted changes and group the files into logical commits. + +Repository: {repo_name or 'unknown'} +Branch: {branch} + +Changed files ({len(files)} total): +{file_list} + +Diff summary: +{diff_summary[:1500]} + +Rules: +- Group related changes together (e.g., feature implementation + tests) +- Separate unrelated concerns (config vs. feature code) +- Keep commits focused and atomic +- Aim for 3-7 groups (not too granular, not too coarse) +- Each group should have a clear purpose + +Output JSON format: +[ + {{ + "files": ["path/to/file1.ts", "path/to/file2.test.ts"], + "reasoning": "Feature: Add user authentication" + }}, + {{ + "files": ["package.json", "package-lock.json"], + "reasoning": "Dependencies: Update packages" + }} +] + +Output ONLY the JSON array, nothing else."""