fix(@ml/auto-commit-pipeline): 🐛 Prevent CoT reasoning leak into commit messages

CRITICAL FIX: CoT reasoning text was leaking into commit messages.

Changes:
- Enhanced _extract_commit_message() to detect and filter reasoning patterns
- Added two-pass extraction: clean commits first, then fallback with cleaning
- Filters patterns like 'Let's analyze', 'step by step', 'First,', etc.
- Strips reasoning text from commit descriptions when found

Regression tests:
- test_extract_commit_message_with_reasoning: Various reasoning positions
- test_extract_commit_message_regression_cot_leak: Real-world failure case
- test_extract_commit_message_edge_cases: Edge cases and multi-line responses

Before: 'chore(shared): 🔧 Let's analyze this step by step:'
After:  'chore(shared): 🔧 Update shared configuration'

Bumped version: 0.1.0 -> 0.1.1

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Lilith 2026-01-13 10:53:25 -08:00
parent 7322a59be9
commit 1bfd737b70
3 changed files with 160 additions and 3 deletions

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "lilith-auto-commit-pipeline"
version = "0.1.0"
version = "0.1.1"
description = "Pipeline-based auto-commit service with RAG and CoT capabilities"
readme = "README.md"
requires-python = ">=3.11"

View file

@ -305,6 +305,8 @@ Think through each step, then provide ONLY the final commit message in the requi
Returns:
Just the commit message
"""
import re
# Look for "COMMIT MESSAGE:" marker
if "COMMIT MESSAGE:" in response:
lines = response.split("COMMIT MESSAGE:", 1)[1].strip().splitlines()
@ -314,9 +316,61 @@ Think through each step, then provide ONLY the final commit message in the requi
if line and not line.startswith("#"):
return line
# Fallback: return last line
# Pattern for conventional commit: type(scope): emoji description
# Valid types: feat, fix, chore, refactor, docs, test, perf, style, build, ci, revert
commit_pattern = re.compile(
r'^(feat|fix|chore|refactor|docs|test|perf|style|build|ci|revert)'
r'(\([^)]+\))?:\s*[\U0001F300-\U0001F9FF]?\s*.+',
re.IGNORECASE
)
# Reasoning patterns to exclude
reasoning_patterns = [
r'let\'?s\s+(analyze|think|consider|break|examine)',
r'step\s+by\s+step',
r'first,?\s+',
r'to\s+(understand|determine|identify)',
r'this\s+(change|commit|update)\s+(is|appears|seems)',
r'based\s+on\s+the',
]
lines = [l.strip() for l in response.splitlines() if l.strip()]
return lines[-1] if lines else "chore: 🔧 Update files"
# Find first line matching conventional commit format that's not reasoning
for line in lines:
# Check if it matches conventional commit format
if not commit_pattern.match(line):
continue
# Even if it matches format, skip if description contains reasoning
if any(re.search(pattern, line, re.IGNORECASE) for pattern in reasoning_patterns):
continue
return line
# Last fallback: first line that starts with a valid type (even if has reasoning)
for line in lines:
if commit_pattern.match(line):
# Try to extract just the part before reasoning starts
# Look for common reasoning starters after the emoji
cleaned = line
for pattern in reasoning_patterns:
match = re.search(pattern, line, re.IGNORECASE)
if match:
# Extract everything before the reasoning starts
cleaned = line[:match.start()].strip()
# Remove trailing punctuation
cleaned = re.sub(r'[:\s]+$', '', cleaned)
break
# If we have something valid after cleaning, use it
if commit_pattern.match(cleaned):
return cleaned
# Otherwise still return the line (better than fallback)
return line
# Ultimate fallback
return "chore: 🔧 Update files"
def _infer_category(self, message: str) -> str | None:
"""Infer commit category from message.

View file

@ -55,3 +55,106 @@ async def test_reason_with_cot_disabled(
assert result.status == StageStatus.SUCCESS
assert len(context_with_changes.commit_messages) == 1
def test_extract_commit_message_with_reasoning():
"""Test extraction of commit message when CoT reasoning is present."""
from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage
stage = ReasonCommitMessageStage()
# Test case 1: Reasoning BEFORE commit message
response1 = """Let's analyze this step by step:
1. The changes add a new feature
2. It's in the authentication module
feat(auth): Add OAuth2 login support"""
result1 = stage._extract_commit_message(response1)
assert result1 == "feat(auth): ✨ Add OAuth2 login support"
assert "Let's analyze" not in result1
assert "step by step" not in result1
# Test case 2: Reasoning AFTER commit message
response2 = """feat(api): 🐛 Fix timeout in user endpoint
This change addresses the timeout issue by increasing the default timeout."""
result2 = stage._extract_commit_message(response2)
assert result2 == "feat(api): 🐛 Fix timeout in user endpoint"
# Test case 3: Mixed reasoning
response3 = """First, let's consider the scope.
chore(deps): 🔧 Update Python dependencies
Based on the changes, this is a dependency update."""
result3 = stage._extract_commit_message(response3)
assert result3 == "chore(deps): 🔧 Update Python dependencies"
assert "First" not in result3
assert "Based on" not in result3
# Test case 4: COMMIT MESSAGE marker
response4 = """Analyzing the changes...
COMMIT MESSAGE:
fix(core): 🐛 Resolve memory leak in cache
Additional context..."""
result4 = stage._extract_commit_message(response4)
assert result4 == "fix(core): 🐛 Resolve memory leak in cache"
# Test case 5: No valid commit (fallback)
response5 = """This is just reasoning text without a proper commit message."""
result5 = stage._extract_commit_message(response5)
assert result5 == "chore: 🔧 Update files" # Fallback
def test_extract_commit_message_regression_cot_leak():
"""Regression test: Ensure CoT reasoning never leaks into commit message."""
from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage
stage = ReasonCommitMessageStage()
# Real-world case from logs: "chore(shared): 🔧 Let's analyze this step by step:"
bad_response = """chore(shared): 🔧 Let's analyze this step by step:
1. Files changed in shared directory
2. Changes affect configuration"""
result = stage._extract_commit_message(bad_response)
# Should NOT contain reasoning text
assert "Let's analyze" not in result
assert "step by step" not in result
assert ":" not in result or result.count(":") == 1 # Only one colon from type(scope):
# Should be a valid conventional commit
assert result.startswith(("feat", "fix", "chore", "refactor", "docs", "test", "perf", "style"))
def test_extract_commit_message_edge_cases():
"""Test edge cases for commit message extraction."""
from lilith_auto_commit_pipeline.stages.reason import ReasonCommitMessageStage
stage = ReasonCommitMessageStage()
# Empty response
assert stage._extract_commit_message("") == "chore: 🔧 Update files"
# Only whitespace
assert stage._extract_commit_message(" \n \n ") == "chore: 🔧 Update files"
# Multiple valid commits (should return first)
multi = """feat(auth): ✨ Add login
fix(api): 🐛 Fix bug"""
assert stage._extract_commit_message(multi).startswith("feat(auth)")
# Commit without emoji
no_emoji = """refactor(core): Simplify authentication logic"""
result = stage._extract_commit_message(no_emoji)
assert result == "refactor(core): Simplify authentication logic"
# Commit with scope but no emoji
assert "feat(api):" in stage._extract_commit_message("feat(api): Add endpoint")