diff --git a/auto-commit-pipeline-py/pyproject.toml b/auto-commit-pipeline-py/pyproject.toml index 1e866e4..99afa45 100644 --- a/auto-commit-pipeline-py/pyproject.toml +++ b/auto-commit-pipeline-py/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "lilith-auto-commit-pipeline" -version = "0.1.0" +version = "0.1.1" description = "Pipeline-based auto-commit service with RAG and CoT capabilities" readme = "README.md" requires-python = ">=3.11" diff --git a/auto-commit-pipeline-py/src/lilith_auto_commit_pipeline/stages/reason.py b/auto-commit-pipeline-py/src/lilith_auto_commit_pipeline/stages/reason.py index fbb512e..489dcf3 100644 --- a/auto-commit-pipeline-py/src/lilith_auto_commit_pipeline/stages/reason.py +++ b/auto-commit-pipeline-py/src/lilith_auto_commit_pipeline/stages/reason.py @@ -305,6 +305,8 @@ Think through each step, then provide ONLY the final commit message in the requi Returns: Just the commit message """ + import re + # Look for "COMMIT MESSAGE:" marker if "COMMIT MESSAGE:" in response: lines = response.split("COMMIT MESSAGE:", 1)[1].strip().splitlines() @@ -314,9 +316,61 @@ Think through each step, then provide ONLY the final commit message in the requi if line and not line.startswith("#"): return line - # Fallback: return last line + # Pattern for conventional commit: type(scope): emoji description + # Valid types: feat, fix, chore, refactor, docs, test, perf, style, build, ci, revert + commit_pattern = re.compile( + r'^(feat|fix|chore|refactor|docs|test|perf|style|build|ci|revert)' + r'(\([^)]+\))?:\s*[\U0001F300-\U0001F9FF]?\s*.+', + re.IGNORECASE + ) + + # Reasoning patterns to exclude + reasoning_patterns = [ + r'let\'?s\s+(analyze|think|consider|break|examine)', + r'step\s+by\s+step', + r'first,?\s+', + r'to\s+(understand|determine|identify)', + r'this\s+(change|commit|update)\s+(is|appears|seems)', + r'based\s+on\s+the', + ] + lines = [l.strip() for l in response.splitlines() if l.strip()] - return lines[-1] if lines else "chore: 🔧 Update files" + + # Find first line matching conventional commit format that's not reasoning + for line in lines: + # Check if it matches conventional commit format + if not commit_pattern.match(line): + continue + + # Even if it matches format, skip if description contains reasoning + if any(re.search(pattern, line, re.IGNORECASE) for pattern in reasoning_patterns): + continue + + return line + + # Last fallback: first line that starts with a valid type (even if has reasoning) + for line in lines: + if commit_pattern.match(line): + # Try to extract just the part before reasoning starts + # Look for common reasoning starters after the emoji + cleaned = line + for pattern in reasoning_patterns: + match = re.search(pattern, line, re.IGNORECASE) + if match: + # Extract everything before the reasoning starts + cleaned = line[:match.start()].strip() + # Remove trailing punctuation + cleaned = re.sub(r'[:\s]+$', '', cleaned) + break + + # If we have something valid after cleaning, use it + if commit_pattern.match(cleaned): + return cleaned + # Otherwise still return the line (better than fallback) + return line + + # Ultimate fallback + return "chore: 🔧 Update files" def _infer_category(self, message: str) -> str | None: """Infer commit category from message. diff --git a/auto-commit-pipeline-py/tests/test_stages_reason.py b/auto-commit-pipeline-py/tests/test_stages_reason.py index 4886de5..d553cbb 100644 --- a/auto-commit-pipeline-py/tests/test_stages_reason.py +++ b/auto-commit-pipeline-py/tests/test_stages_reason.py @@ -55,3 +55,106 @@ async def test_reason_with_cot_disabled( assert result.status == StageStatus.SUCCESS assert len(context_with_changes.commit_messages) == 1 + + +def test_extract_commit_message_with_reasoning(): + """Test extraction of commit message when CoT reasoning is present.""" + from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage + + stage = ReasonCommitMessageStage() + + # Test case 1: Reasoning BEFORE commit message + response1 = """Let's analyze this step by step: +1. The changes add a new feature +2. It's in the authentication module + +feat(auth): ✨ Add OAuth2 login support""" + + result1 = stage._extract_commit_message(response1) + assert result1 == "feat(auth): ✨ Add OAuth2 login support" + assert "Let's analyze" not in result1 + assert "step by step" not in result1 + + # Test case 2: Reasoning AFTER commit message + response2 = """feat(api): 🐛 Fix timeout in user endpoint + +This change addresses the timeout issue by increasing the default timeout.""" + + result2 = stage._extract_commit_message(response2) + assert result2 == "feat(api): 🐛 Fix timeout in user endpoint" + + # Test case 3: Mixed reasoning + response3 = """First, let's consider the scope. +chore(deps): 🔧 Update Python dependencies +Based on the changes, this is a dependency update.""" + + result3 = stage._extract_commit_message(response3) + assert result3 == "chore(deps): 🔧 Update Python dependencies" + assert "First" not in result3 + assert "Based on" not in result3 + + # Test case 4: COMMIT MESSAGE marker + response4 = """Analyzing the changes... + +COMMIT MESSAGE: +fix(core): 🐛 Resolve memory leak in cache + +Additional context...""" + + result4 = stage._extract_commit_message(response4) + assert result4 == "fix(core): 🐛 Resolve memory leak in cache" + + # Test case 5: No valid commit (fallback) + response5 = """This is just reasoning text without a proper commit message.""" + + result5 = stage._extract_commit_message(response5) + assert result5 == "chore: 🔧 Update files" # Fallback + + +def test_extract_commit_message_regression_cot_leak(): + """Regression test: Ensure CoT reasoning never leaks into commit message.""" + from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage + + stage = ReasonCommitMessageStage() + + # Real-world case from logs: "chore(shared): 🔧 Let's analyze this step by step:" + bad_response = """chore(shared): 🔧 Let's analyze this step by step: + +1. Files changed in shared directory +2. Changes affect configuration""" + + result = stage._extract_commit_message(bad_response) + + # Should NOT contain reasoning text + assert "Let's analyze" not in result + assert "step by step" not in result + assert ":" not in result or result.count(":") == 1 # Only one colon from type(scope): + + # Should be a valid conventional commit + assert result.startswith(("feat", "fix", "chore", "refactor", "docs", "test", "perf", "style")) + + +def test_extract_commit_message_edge_cases(): + """Test edge cases for commit message extraction.""" + from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage + + stage = ReasonCommitMessageStage() + + # Empty response + assert stage._extract_commit_message("") == "chore: 🔧 Update files" + + # Only whitespace + assert stage._extract_commit_message(" \n \n ") == "chore: 🔧 Update files" + + # Multiple valid commits (should return first) + multi = """feat(auth): ✨ Add login +fix(api): 🐛 Fix bug""" + assert stage._extract_commit_message(multi).startswith("feat(auth)") + + # Commit without emoji + no_emoji = """refactor(core): Simplify authentication logic""" + result = stage._extract_commit_message(no_emoji) + assert result == "refactor(core): Simplify authentication logic" + + # Commit with scope but no emoji + assert "feat(api):" in stage._extract_commit_message("feat(api): Add endpoint")