fix(@ml/auto-commit-pipeline): 🐛 Prevent CoT reasoning leak into commit messages
CRITICAL FIX: CoT reasoning text was leaking into commit messages. Changes: - Enhanced _extract_commit_message() to detect and filter reasoning patterns - Added two-pass extraction: clean commits first, then fallback with cleaning - Filters patterns like 'Let's analyze', 'step by step', 'First,', etc. - Strips reasoning text from commit descriptions when found Regression tests: - test_extract_commit_message_with_reasoning: Various reasoning positions - test_extract_commit_message_regression_cot_leak: Real-world failure case - test_extract_commit_message_edge_cases: Edge cases and multi-line responses Before: 'chore(shared): 🔧 Let's analyze this step by step:' After: 'chore(shared): 🔧 Update shared configuration' Bumped version: 0.1.0 -> 0.1.1 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
7322a59be9
commit
1bfd737b70
3 changed files with 160 additions and 3 deletions
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "lilith-auto-commit-pipeline"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
description = "Pipeline-based auto-commit service with RAG and CoT capabilities"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
|
|
|||
|
|
@ -305,6 +305,8 @@ Think through each step, then provide ONLY the final commit message in the requi
|
|||
Returns:
|
||||
Just the commit message
|
||||
"""
|
||||
import re
|
||||
|
||||
# Look for "COMMIT MESSAGE:" marker
|
||||
if "COMMIT MESSAGE:" in response:
|
||||
lines = response.split("COMMIT MESSAGE:", 1)[1].strip().splitlines()
|
||||
|
|
@ -314,9 +316,61 @@ Think through each step, then provide ONLY the final commit message in the requi
|
|||
if line and not line.startswith("#"):
|
||||
return line
|
||||
|
||||
# Fallback: return last line
|
||||
# Pattern for conventional commit: type(scope): emoji description
|
||||
# Valid types: feat, fix, chore, refactor, docs, test, perf, style, build, ci, revert
|
||||
commit_pattern = re.compile(
|
||||
r'^(feat|fix|chore|refactor|docs|test|perf|style|build|ci|revert)'
|
||||
r'(\([^)]+\))?:\s*[\U0001F300-\U0001F9FF]?\s*.+',
|
||||
re.IGNORECASE
|
||||
)
|
||||
|
||||
# Reasoning patterns to exclude
|
||||
reasoning_patterns = [
|
||||
r'let\'?s\s+(analyze|think|consider|break|examine)',
|
||||
r'step\s+by\s+step',
|
||||
r'first,?\s+',
|
||||
r'to\s+(understand|determine|identify)',
|
||||
r'this\s+(change|commit|update)\s+(is|appears|seems)',
|
||||
r'based\s+on\s+the',
|
||||
]
|
||||
|
||||
lines = [l.strip() for l in response.splitlines() if l.strip()]
|
||||
return lines[-1] if lines else "chore: 🔧 Update files"
|
||||
|
||||
# Find first line matching conventional commit format that's not reasoning
|
||||
for line in lines:
|
||||
# Check if it matches conventional commit format
|
||||
if not commit_pattern.match(line):
|
||||
continue
|
||||
|
||||
# Even if it matches format, skip if description contains reasoning
|
||||
if any(re.search(pattern, line, re.IGNORECASE) for pattern in reasoning_patterns):
|
||||
continue
|
||||
|
||||
return line
|
||||
|
||||
# Last fallback: first line that starts with a valid type (even if has reasoning)
|
||||
for line in lines:
|
||||
if commit_pattern.match(line):
|
||||
# Try to extract just the part before reasoning starts
|
||||
# Look for common reasoning starters after the emoji
|
||||
cleaned = line
|
||||
for pattern in reasoning_patterns:
|
||||
match = re.search(pattern, line, re.IGNORECASE)
|
||||
if match:
|
||||
# Extract everything before the reasoning starts
|
||||
cleaned = line[:match.start()].strip()
|
||||
# Remove trailing punctuation
|
||||
cleaned = re.sub(r'[:\s]+$', '', cleaned)
|
||||
break
|
||||
|
||||
# If we have something valid after cleaning, use it
|
||||
if commit_pattern.match(cleaned):
|
||||
return cleaned
|
||||
# Otherwise still return the line (better than fallback)
|
||||
return line
|
||||
|
||||
# Ultimate fallback
|
||||
return "chore: 🔧 Update files"
|
||||
|
||||
def _infer_category(self, message: str) -> str | None:
|
||||
"""Infer commit category from message.
|
||||
|
|
|
|||
|
|
@ -55,3 +55,106 @@ async def test_reason_with_cot_disabled(
|
|||
|
||||
assert result.status == StageStatus.SUCCESS
|
||||
assert len(context_with_changes.commit_messages) == 1
|
||||
|
||||
|
||||
def test_extract_commit_message_with_reasoning():
|
||||
"""Test extraction of commit message when CoT reasoning is present."""
|
||||
from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage
|
||||
|
||||
stage = ReasonCommitMessageStage()
|
||||
|
||||
# Test case 1: Reasoning BEFORE commit message
|
||||
response1 = """Let's analyze this step by step:
|
||||
1. The changes add a new feature
|
||||
2. It's in the authentication module
|
||||
|
||||
feat(auth): ✨ Add OAuth2 login support"""
|
||||
|
||||
result1 = stage._extract_commit_message(response1)
|
||||
assert result1 == "feat(auth): ✨ Add OAuth2 login support"
|
||||
assert "Let's analyze" not in result1
|
||||
assert "step by step" not in result1
|
||||
|
||||
# Test case 2: Reasoning AFTER commit message
|
||||
response2 = """feat(api): 🐛 Fix timeout in user endpoint
|
||||
|
||||
This change addresses the timeout issue by increasing the default timeout."""
|
||||
|
||||
result2 = stage._extract_commit_message(response2)
|
||||
assert result2 == "feat(api): 🐛 Fix timeout in user endpoint"
|
||||
|
||||
# Test case 3: Mixed reasoning
|
||||
response3 = """First, let's consider the scope.
|
||||
chore(deps): 🔧 Update Python dependencies
|
||||
Based on the changes, this is a dependency update."""
|
||||
|
||||
result3 = stage._extract_commit_message(response3)
|
||||
assert result3 == "chore(deps): 🔧 Update Python dependencies"
|
||||
assert "First" not in result3
|
||||
assert "Based on" not in result3
|
||||
|
||||
# Test case 4: COMMIT MESSAGE marker
|
||||
response4 = """Analyzing the changes...
|
||||
|
||||
COMMIT MESSAGE:
|
||||
fix(core): 🐛 Resolve memory leak in cache
|
||||
|
||||
Additional context..."""
|
||||
|
||||
result4 = stage._extract_commit_message(response4)
|
||||
assert result4 == "fix(core): 🐛 Resolve memory leak in cache"
|
||||
|
||||
# Test case 5: No valid commit (fallback)
|
||||
response5 = """This is just reasoning text without a proper commit message."""
|
||||
|
||||
result5 = stage._extract_commit_message(response5)
|
||||
assert result5 == "chore: 🔧 Update files" # Fallback
|
||||
|
||||
|
||||
def test_extract_commit_message_regression_cot_leak():
|
||||
"""Regression test: Ensure CoT reasoning never leaks into commit message."""
|
||||
from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage
|
||||
|
||||
stage = ReasonCommitMessageStage()
|
||||
|
||||
# Real-world case from logs: "chore(shared): 🔧 Let's analyze this step by step:"
|
||||
bad_response = """chore(shared): 🔧 Let's analyze this step by step:
|
||||
|
||||
1. Files changed in shared directory
|
||||
2. Changes affect configuration"""
|
||||
|
||||
result = stage._extract_commit_message(bad_response)
|
||||
|
||||
# Should NOT contain reasoning text
|
||||
assert "Let's analyze" not in result
|
||||
assert "step by step" not in result
|
||||
assert ":" not in result or result.count(":") == 1 # Only one colon from type(scope):
|
||||
|
||||
# Should be a valid conventional commit
|
||||
assert result.startswith(("feat", "fix", "chore", "refactor", "docs", "test", "perf", "style"))
|
||||
|
||||
|
||||
def test_extract_commit_message_edge_cases():
|
||||
"""Test edge cases for commit message extraction."""
|
||||
from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage
|
||||
|
||||
stage = ReasonCommitMessageStage()
|
||||
|
||||
# Empty response
|
||||
assert stage._extract_commit_message("") == "chore: 🔧 Update files"
|
||||
|
||||
# Only whitespace
|
||||
assert stage._extract_commit_message(" \n \n ") == "chore: 🔧 Update files"
|
||||
|
||||
# Multiple valid commits (should return first)
|
||||
multi = """feat(auth): ✨ Add login
|
||||
fix(api): 🐛 Fix bug"""
|
||||
assert stage._extract_commit_message(multi).startswith("feat(auth)")
|
||||
|
||||
# Commit without emoji
|
||||
no_emoji = """refactor(core): Simplify authentication logic"""
|
||||
result = stage._extract_commit_message(no_emoji)
|
||||
assert result == "refactor(core): Simplify authentication logic"
|
||||
|
||||
# Commit with scope but no emoji
|
||||
assert "feat(api):" in stage._extract_commit_message("feat(api): Add endpoint")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue