auto-commit-service/tests/test_format_utils.py

483 lines
19 KiB
Python
Raw Permalink Normal View History

"""Tests for commit message formatting utilities.
Covers sanitize_scope (emoji stripping, normalization) and correct_emoji
(enforcing canonical gitmoji from the GITMOJI_MAP table).
"""
import pytest
from auto_commit_service.pipeline.format_utils import (
auto_correct_format,
correct_emoji,
extract_commit_message,
sanitize_message,
sanitize_message_scope,
sanitize_scope,
)
from auto_commit_service.pipeline.gitmoji import GITMOJI_MAP
from auto_commit_service.llm.validator import validate_commit_message
class TestSanitizeScope:
"""Scope values must be plain ASCII identifiers — no emoji, no parens."""
@pytest.mark.parametrize(
("raw", "expected"),
[
("auth", "auth"),
("api-routes", "api-routes"),
("pipeline.stages", "pipeline.stages"),
("core_module", "core_module"),
],
)
def test_passthrough_clean_scopes(self, raw: str, expected: str) -> None:
assert sanitize_scope(raw) == expected
@pytest.mark.parametrize(
("raw", "expected"),
[
("✨ auth", "auth"),
("🔧config", "config"),
("🐛 api-routes", "api-routes"),
("✨🎉 new-feature", "feature"), # "new" is a stop word
("⚡ perf", "perf"),
],
)
def test_strips_emoji(self, raw: str, expected: str) -> None:
assert sanitize_scope(raw) == expected
def test_strips_parentheses(self) -> None:
# "with" is a stop word, so only meaningful tokens remain
result = sanitize_scope("core(with react integration)")
assert "with" not in result.split("-")
assert result # Should still have some content
def test_emoji_only_returns_empty(self) -> None:
assert sanitize_scope("") == ""
def test_empty_input(self) -> None:
assert sanitize_scope("") == ""
def test_truncates_long_scope(self) -> None:
result = sanitize_scope("this is a really long scope value that exceeds the limit")
assert len(result) <= 25
def test_custom_max_length(self) -> None:
result = sanitize_scope("authentication", max_length=10)
assert len(result) <= 10
def test_collapses_whitespace_to_hyphens(self) -> None:
assert sanitize_scope("api routes") == "api-routes"
# --- New tests for stop word filtering ---
def test_strips_stop_words_from_natural_language(self) -> None:
assert sanitize_scope("the primary auth module") == "auth"
assert sanitize_scope("for the config settings") == "config-settings"
def test_strips_stop_words_from_hyphenated(self) -> None:
assert sanitize_scope("theme-primary-as-its-the") == "theme"
assert sanitize_scope("ui-tiers-the-package-name-as") == "ui-tiers"
assert sanitize_scope("config-settings-for-the-main") == "config-settings"
def test_empty_after_all_stop_words(self) -> None:
assert sanitize_scope("the a an is") == ""
def test_rejects_module_file_package_words(self) -> None:
assert sanitize_scope("auth module") == "auth"
assert sanitize_scope("config file") == "config"
class TestCorrectEmoji:
"""correct_emoji enforces the canonical gitmoji from GITMOJI_MAP."""
@pytest.mark.parametrize(
("message", "expected"),
[
# Wrong emoji → corrected
("feat(auth): 🔧 Add login", "feat(auth): ✨ Add login"),
("fix(api): ✨ Resolve timeout", "fix(api): 🐛 Resolve timeout"),
("chore(config): 🐛 Update rules", "chore(config): 🔧 Update rules"),
("perf(db): 🔧 Optimize queries", "perf(db): ⚡ Optimize queries"),
("test(auth): 🔧 Add unit tests", "test(auth): ✅ Add unit tests"),
("refactor(core): 🔧 Extract utils", "refactor(core): ♻️ Extract utils"),
("docs(readme): 🔧 Add setup guide", "docs(readme): 📝 Add setup guide"),
],
)
def test_corrects_wrong_emoji(self, message: str, expected: str) -> None:
assert correct_emoji(message) == expected
@pytest.mark.parametrize(
"message",
[
"feat(auth): ✨ Add login",
"fix(api): 🐛 Resolve timeout",
"chore(config): 🔧 Update ESLint rules",
"refactor(core): ♻️ Extract validation logic",
"docs(readme): 📝 Add installation guide",
"test(auth): ✅ Add integration tests",
"perf(query): ⚡ Optimize database lookup",
],
)
def test_preserves_correct_emoji(self, message: str) -> None:
assert correct_emoji(message) == message
def test_handles_no_scope(self) -> None:
assert correct_emoji("docs: 📝 Update README") == "docs: 📝 Update README"
def test_passthrough_non_commit_format(self) -> None:
assert correct_emoji("random text here") == "random text here"
assert correct_emoji("") == ""
@pytest.mark.parametrize("commit_type", list(GITMOJI_MAP.keys()))
def test_all_gitmoji_types_have_correction(self, commit_type: str) -> None:
"""Every type in GITMOJI_MAP should produce the correct emoji."""
wrong_emoji = "🤖"
correct = GITMOJI_MAP[commit_type]
message = f"{commit_type}(test): {wrong_emoji} Do something useful here"
result = correct_emoji(message)
assert result == f"{commit_type}(test): {correct} Do something useful here", (
f"Type '{commit_type}' should produce {correct}, got: {result}"
)
class TestValidatorAcceptsAllGitmojiTypes:
"""The validator must accept every commit type from the gitmoji table."""
@pytest.mark.parametrize("commit_type", list(GITMOJI_MAP.keys()))
def test_validator_recognizes_type(self, commit_type: str) -> None:
from auto_commit_service.llm.validator import validate_commit_message
emoji = GITMOJI_MAP[commit_type]
message = f"{commit_type}(core): {emoji} add meaningful feature implementation"
result = validate_commit_message(message)
assert result.valid, (
f"Type '{commit_type}' should be valid, violations: {result.violations}"
)
class TestFormatStageValidatorRetry:
"""FormatCommitMessageStage retries LLM once when validator rejects the message."""
@pytest.fixture
def analysis(self):
from auto_commit_service.pipeline.models import CommitAnalysis
return CommitAnalysis(
files=["src/config.py"],
change_type="chore",
scope="config",
reasoning="Updated config settings",
impact_summary="Configuration cleanup",
suggested_description="Refactor settings for clarity",
)
@pytest.fixture
def format_stage(self):
from auto_commit_service.pipeline.stages.format import FormatCommitMessageStage
return FormatCommitMessageStage()
@pytest.mark.asyncio
async def test_valid_first_attempt_no_retry(self, format_stage, analysis) -> None:
"""When the first LLM response passes validation, no retry occurs."""
from unittest.mock import AsyncMock, patch
good_response = "chore(config): 🔧 refactor settings for service isolation"
mock_client = AsyncMock()
mock_client.format_commit_message = AsyncMock(return_value=good_response)
with patch(
"auto_commit_service.pipeline.init.get_llm_client",
return_value=mock_client,
):
result = await format_stage._format_commit_message(analysis)
assert result.message == "chore(config): 🔧 refactor settings for service isolation"
mock_client.format_commit_message.assert_called_once()
@pytest.mark.asyncio
async def test_retry_on_invalid_first_attempt(self, format_stage, analysis) -> None:
"""When the first LLM response fails validation, retry with feedback."""
from unittest.mock import AsyncMock, patch
garbage = "chore(config): 🔧 Update 5 py files"
good_retry = "chore(config): 🔧 refactor settings for service isolation"
mock_client = AsyncMock()
mock_client.format_commit_message = AsyncMock(
side_effect=[garbage, good_retry]
)
with patch(
"auto_commit_service.pipeline.init.get_llm_client",
return_value=mock_client,
):
result = await format_stage._format_commit_message(analysis)
assert result.message == "chore(config): 🔧 refactor settings for service isolation"
assert mock_client.format_commit_message.call_count == 2
@pytest.mark.asyncio
async def test_uses_retry_when_both_fail(self, format_stage, analysis) -> None:
"""When both attempts fail validation, use the retry (usually better)."""
from unittest.mock import AsyncMock, patch
garbage1 = "chore(config): 🔧 Update 5 py files"
garbage2 = "chore(config): 🔧 Update configuration files"
mock_client = AsyncMock()
mock_client.format_commit_message = AsyncMock(
side_effect=[garbage1, garbage2]
)
with patch(
"auto_commit_service.pipeline.init.get_llm_client",
return_value=mock_client,
):
result = await format_stage._format_commit_message(analysis)
# Should use the retry attempt even though it also failed
assert result.message == "chore(config): 🔧 Update configuration files"
assert mock_client.format_commit_message.call_count == 2
def test_retry_prompt_includes_violations(self, format_stage, analysis) -> None:
"""The retry prompt must include the violation feedback."""
violations = [
"Contains banned phrase matching: ^update\\s+\\d+\\s+(files?|py\\s+files?)",
"Missing action verb",
]
prompt = format_stage._build_retry_prompt(
analysis,
"chore(config): 🔧 Update 5 py files",
violations,
)
assert "rejected by quality checks" in prompt
assert "Update 5 py files" in prompt
assert "banned phrase" in prompt
assert "Missing action verb" in prompt
assert analysis.impact_summary in prompt
assert analysis.suggested_description in prompt
class TestSanitizeMessageScope:
"""Tests for sanitize_message_scope — scope cleanup within formatted messages."""
def test_cleans_reasoning_leak_in_scope(self) -> None:
result = sanitize_message_scope("feat(the primary auth module): ✨ Add login")
assert result == "feat(auth): ✨ Add login"
def test_cleans_hyphenated_reasoning_in_scope(self) -> None:
result = sanitize_message_scope(
"deps-upgrade(theme-primary-as-its-the): ⬆️ Bump vite"
)
assert result == "deps-upgrade(theme): ⬆️ Bump vite"
def test_preserves_clean_scope(self) -> None:
msg = "feat(auth): ✨ Add OAuth2 login support"
assert sanitize_message_scope(msg) == msg
def test_preserves_clean_kebab_scope(self) -> None:
msg = "refactor(pipeline-stages): ♻️ Extract shared logic"
assert sanitize_message_scope(msg) == msg
def test_removes_scope_if_empty_after_sanitization(self) -> None:
result = sanitize_message_scope("chore(the a an is): 🔧 Update something")
assert result == "chore: 🔧 Update something"
def test_no_scope_message_unchanged(self) -> None:
msg = "chore: 🔧 Update something"
assert sanitize_message_scope(msg) == msg
def test_non_matching_message_unchanged(self) -> None:
msg = "not a commit message"
assert sanitize_message_scope(msg) == msg
class TestAutoCorrectFormat:
"""Tests for auto_correct_format — fixes slash and dash separators."""
def test_fixes_slash_format(self) -> None:
result = auto_correct_format("feat/auth: ✨ Add login")
assert result == "feat(auth): ✨ Add login"
def test_fixes_dash_format(self) -> None:
result = auto_correct_format("fix-api: 🐛 Fix timeout")
assert result == "fix(api): 🐛 Fix timeout"
def test_preserves_correct_format(self) -> None:
msg = "feat(auth): ✨ Add login"
assert auto_correct_format(msg) == msg
def test_preserves_non_commit_line(self) -> None:
msg = "This is just a regular line"
assert auto_correct_format(msg) == msg
class TestExtractCommitMessage:
"""Tests for extract_commit_message — extracts from LLM response."""
def test_extracts_from_single_line(self) -> None:
response = "feat(auth): ✨ Add OAuth2 login support"
assert extract_commit_message(response) == response
def test_extracts_from_multiline_with_reasoning(self) -> None:
response = """Let me analyze the changes.
Based on the file changes, this is a feature addition.
feat(auth): Add OAuth2 login support
This adds the login endpoint."""
assert extract_commit_message(response) == "feat(auth): ✨ Add OAuth2 login support"
def test_skips_reasoning_lines(self) -> None:
response = """feat(auth): ✨ **Reasoning:** Based on analysis
feat(auth): Add OAuth2 login support"""
assert extract_commit_message(response) == "feat(auth): ✨ Add OAuth2 login support"
def test_auto_corrects_slash_in_response(self) -> None:
response = "feat/auth: ✨ Add OAuth2 login support"
result = extract_commit_message(response)
assert result == "feat(auth): ✨ Add OAuth2 login support"
def test_raises_on_no_valid_message(self) -> None:
with pytest.raises(RuntimeError, match="Failed to extract"):
extract_commit_message("Just some random text with no commit format")
class TestSanitizeMessage:
"""Tests for sanitize_message — removes markdown artifacts."""
def test_removes_bold_markers(self) -> None:
assert sanitize_message("**feat**: add login") == "feat: add login"
def test_removes_code_markers(self) -> None:
assert sanitize_message("`feat`: add login") == "feat: add login"
def test_collapses_multiple_spaces(self) -> None:
assert sanitize_message("feat: add login") == "feat: add login"
class TestValidatorScopeChecks:
"""Tests for validate_commit_message scope validation."""
def test_rejects_generic_src_scope(self) -> None:
result = validate_commit_message("feat(src): ✨ Add new feature")
assert not result.valid
assert any("generic" in v.lower() for v in result.violations)
def test_rejects_generic_lib_scope(self) -> None:
result = validate_commit_message("feat(lib): ✨ Add new feature")
assert not result.valid
def test_rejects_generic_app_scope(self) -> None:
result = validate_commit_message("feat(app): ✨ Add new feature")
assert not result.valid
def test_accepts_specific_scope(self) -> None:
result = validate_commit_message("feat(auth): ✨ Add OAuth2 login support")
assert result.valid
def test_accepts_kebab_scope(self) -> None:
result = validate_commit_message(
"refactor(api-routes): ♻️ Extract shared validation logic"
)
assert result.valid
def test_rejects_scope_with_spaces(self) -> None:
result = validate_commit_message("feat(auth module): ✨ Add login support")
assert not result.valid
assert any("spaces" in v.lower() for v in result.violations)
def test_rejects_overly_long_scope(self) -> None:
long_scope = "a" * 30
result = validate_commit_message(f"feat({long_scope}): ✨ Add new feature")
assert not result.valid
assert any("long" in v.lower() for v in result.violations)
class TestAutoCorrectFormat:
"""auto_correct_format applies _CORRECTIONS table entries in order."""
# --- bare deps normalization ---
def test_bare_deps_no_scope(self) -> None:
result = auto_correct_format("deps: 🔧 Rebuild debug dependencies")
assert result.startswith("deps-upgrade:")
def test_bare_deps_with_scope(self) -> None:
result = auto_correct_format("deps(simulator): 🔧 Rebuild things")
assert result.startswith("deps-upgrade(simulator):")
def test_bare_deps_actual_failing_log_message(self) -> None:
"""Reproduces the exact message from the activity log that was failing."""
msg = (
"deps: 🔧 Rebuild debug dependencies for simulator modules "
"(physics, AI, city, climate, combat, core)"
)
result = auto_correct_format(msg)
assert result.startswith("deps-upgrade:")
assert "Rebuild debug dependencies" in result
def test_deps_upgrade_passes_through_unchanged(self) -> None:
"""Valid deps-upgrade must not be mangled."""
msg = "deps-upgrade(api): ⬆️ bump httpx to 0.27"
assert auto_correct_format(msg) == msg
def test_deps_upgrade_with_scope_passes_through(self) -> None:
msg = "deps-upgrade(config): ⬆️ Update dependency versions"
assert auto_correct_format(msg) == msg
def test_deps_add_passes_through_unchanged(self) -> None:
msg = "deps-add(core): add aiohttp"
assert auto_correct_format(msg) == msg
# --- slash scope correction ---
def test_slash_scope_corrected(self) -> None:
result = auto_correct_format("feat/auth: ✨ Add login")
assert result == "feat(auth): ✨ Add login"
def test_slash_scope_with_hyphen(self) -> None:
result = auto_correct_format("fix/api-routes: 🐛 Fix timeout")
assert result == "fix(api-routes): 🐛 Fix timeout"
# --- dash scope correction ---
def test_dash_scope_corrected(self) -> None:
result = auto_correct_format("chore-config: 🔧 Update rules")
assert result == "chore(config): 🔧 Update rules"
# --- passthrough for valid messages ---
@pytest.mark.parametrize("msg", [
"feat(auth): ✨ Add OAuth2",
"fix(api): 🐛 Resolve null pointer",
"chore(config): 🔧 Update ESLint rules",
"refactor(core): ♻️ Extract shared logic",
"test(auth): ✅ Add integration tests",
])
def test_valid_messages_unchanged(self, msg: str) -> None:
assert auto_correct_format(msg) == msg
def test_unrelated_text_unchanged(self) -> None:
assert auto_correct_format("some random text") == "some random text"
def test_empty_string_unchanged(self) -> None:
assert auto_correct_format("") == ""
# --- end-to-end through extract_commit_message ---
def test_bare_deps_extractable(self) -> None:
"""bare deps: must produce a valid extractable message."""
msg = "deps: 🔧 Rebuild simulator debug modules"
result = extract_commit_message(msg)
assert result.startswith("deps-upgrade:")
def test_corrections_table_is_open_for_extension(self) -> None:
"""_CORRECTIONS is a list — new entries can be appended without modifying the function."""
from auto_commit_service.pipeline.format_utils import _CORRECTIONS
assert isinstance(_CORRECTIONS, list)
assert len(_CORRECTIONS) >= 3