auto-commit-service/tests/test_format_utils.py

482 lines
19 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for commit message formatting utilities.
Covers sanitize_scope (emoji stripping, normalization) and correct_emoji
(enforcing canonical gitmoji from the GITMOJI_MAP table).
"""
import pytest
from auto_commit_service.pipeline.format_utils import (
auto_correct_format,
correct_emoji,
extract_commit_message,
sanitize_message,
sanitize_message_scope,
sanitize_scope,
)
from auto_commit_service.pipeline.gitmoji import GITMOJI_MAP
from auto_commit_service.llm.validator import validate_commit_message
class TestSanitizeScope:
"""Scope values must be plain ASCII identifiers — no emoji, no parens."""
@pytest.mark.parametrize(
("raw", "expected"),
[
("auth", "auth"),
("api-routes", "api-routes"),
("pipeline.stages", "pipeline.stages"),
("core_module", "core_module"),
],
)
def test_passthrough_clean_scopes(self, raw: str, expected: str) -> None:
assert sanitize_scope(raw) == expected
@pytest.mark.parametrize(
("raw", "expected"),
[
("✨ auth", "auth"),
("🔧config", "config"),
("🐛 api-routes", "api-routes"),
("✨🎉 new-feature", "feature"), # "new" is a stop word
("⚡ perf", "perf"),
],
)
def test_strips_emoji(self, raw: str, expected: str) -> None:
assert sanitize_scope(raw) == expected
def test_strips_parentheses(self) -> None:
# "with" is a stop word, so only meaningful tokens remain
result = sanitize_scope("core(with react integration)")
assert "with" not in result.split("-")
assert result # Should still have some content
def test_emoji_only_returns_empty(self) -> None:
assert sanitize_scope("") == ""
def test_empty_input(self) -> None:
assert sanitize_scope("") == ""
def test_truncates_long_scope(self) -> None:
result = sanitize_scope("this is a really long scope value that exceeds the limit")
assert len(result) <= 25
def test_custom_max_length(self) -> None:
result = sanitize_scope("authentication", max_length=10)
assert len(result) <= 10
def test_collapses_whitespace_to_hyphens(self) -> None:
assert sanitize_scope("api routes") == "api-routes"
# --- New tests for stop word filtering ---
def test_strips_stop_words_from_natural_language(self) -> None:
assert sanitize_scope("the primary auth module") == "auth"
assert sanitize_scope("for the config settings") == "config-settings"
def test_strips_stop_words_from_hyphenated(self) -> None:
assert sanitize_scope("theme-primary-as-its-the") == "theme"
assert sanitize_scope("ui-tiers-the-package-name-as") == "ui-tiers"
assert sanitize_scope("config-settings-for-the-main") == "config-settings"
def test_empty_after_all_stop_words(self) -> None:
assert sanitize_scope("the a an is") == ""
def test_rejects_module_file_package_words(self) -> None:
assert sanitize_scope("auth module") == "auth"
assert sanitize_scope("config file") == "config"
class TestCorrectEmoji:
"""correct_emoji enforces the canonical gitmoji from GITMOJI_MAP."""
@pytest.mark.parametrize(
("message", "expected"),
[
# Wrong emoji → corrected
("feat(auth): 🔧 Add login", "feat(auth): ✨ Add login"),
("fix(api): ✨ Resolve timeout", "fix(api): 🐛 Resolve timeout"),
("chore(config): 🐛 Update rules", "chore(config): 🔧 Update rules"),
("perf(db): 🔧 Optimize queries", "perf(db): ⚡ Optimize queries"),
("test(auth): 🔧 Add unit tests", "test(auth): ✅ Add unit tests"),
("refactor(core): 🔧 Extract utils", "refactor(core): ♻️ Extract utils"),
("docs(readme): 🔧 Add setup guide", "docs(readme): 📝 Add setup guide"),
],
)
def test_corrects_wrong_emoji(self, message: str, expected: str) -> None:
assert correct_emoji(message) == expected
@pytest.mark.parametrize(
"message",
[
"feat(auth): ✨ Add login",
"fix(api): 🐛 Resolve timeout",
"chore(config): 🔧 Update ESLint rules",
"refactor(core): ♻️ Extract validation logic",
"docs(readme): 📝 Add installation guide",
"test(auth): ✅ Add integration tests",
"perf(query): ⚡ Optimize database lookup",
],
)
def test_preserves_correct_emoji(self, message: str) -> None:
assert correct_emoji(message) == message
def test_handles_no_scope(self) -> None:
assert correct_emoji("docs: 📝 Update README") == "docs: 📝 Update README"
def test_passthrough_non_commit_format(self) -> None:
assert correct_emoji("random text here") == "random text here"
assert correct_emoji("") == ""
@pytest.mark.parametrize("commit_type", list(GITMOJI_MAP.keys()))
def test_all_gitmoji_types_have_correction(self, commit_type: str) -> None:
"""Every type in GITMOJI_MAP should produce the correct emoji."""
wrong_emoji = "🤖"
correct = GITMOJI_MAP[commit_type]
message = f"{commit_type}(test): {wrong_emoji} Do something useful here"
result = correct_emoji(message)
assert result == f"{commit_type}(test): {correct} Do something useful here", (
f"Type '{commit_type}' should produce {correct}, got: {result}"
)
class TestValidatorAcceptsAllGitmojiTypes:
"""The validator must accept every commit type from the gitmoji table."""
@pytest.mark.parametrize("commit_type", list(GITMOJI_MAP.keys()))
def test_validator_recognizes_type(self, commit_type: str) -> None:
from auto_commit_service.llm.validator import validate_commit_message
emoji = GITMOJI_MAP[commit_type]
message = f"{commit_type}(core): {emoji} add meaningful feature implementation"
result = validate_commit_message(message)
assert result.valid, (
f"Type '{commit_type}' should be valid, violations: {result.violations}"
)
class TestFormatStageValidatorRetry:
"""FormatCommitMessageStage retries LLM once when validator rejects the message."""
@pytest.fixture
def analysis(self):
from auto_commit_service.pipeline.models import CommitAnalysis
return CommitAnalysis(
files=["src/config.py"],
change_type="chore",
scope="config",
reasoning="Updated config settings",
impact_summary="Configuration cleanup",
suggested_description="Refactor settings for clarity",
)
@pytest.fixture
def format_stage(self):
from auto_commit_service.pipeline.stages.format import FormatCommitMessageStage
return FormatCommitMessageStage()
@pytest.mark.asyncio
async def test_valid_first_attempt_no_retry(self, format_stage, analysis) -> None:
"""When the first LLM response passes validation, no retry occurs."""
from unittest.mock import AsyncMock, patch
good_response = "chore(config): 🔧 refactor settings for service isolation"
mock_client = AsyncMock()
mock_client.format_commit_message = AsyncMock(return_value=good_response)
with patch(
"auto_commit_service.pipeline.init.get_llm_client",
return_value=mock_client,
):
result = await format_stage._format_commit_message(analysis)
assert result.message == "chore(config): 🔧 refactor settings for service isolation"
mock_client.format_commit_message.assert_called_once()
@pytest.mark.asyncio
async def test_retry_on_invalid_first_attempt(self, format_stage, analysis) -> None:
"""When the first LLM response fails validation, retry with feedback."""
from unittest.mock import AsyncMock, patch
garbage = "chore(config): 🔧 Update 5 py files"
good_retry = "chore(config): 🔧 refactor settings for service isolation"
mock_client = AsyncMock()
mock_client.format_commit_message = AsyncMock(
side_effect=[garbage, good_retry]
)
with patch(
"auto_commit_service.pipeline.init.get_llm_client",
return_value=mock_client,
):
result = await format_stage._format_commit_message(analysis)
assert result.message == "chore(config): 🔧 refactor settings for service isolation"
assert mock_client.format_commit_message.call_count == 2
@pytest.mark.asyncio
async def test_uses_retry_when_both_fail(self, format_stage, analysis) -> None:
"""When both attempts fail validation, use the retry (usually better)."""
from unittest.mock import AsyncMock, patch
garbage1 = "chore(config): 🔧 Update 5 py files"
garbage2 = "chore(config): 🔧 Update configuration files"
mock_client = AsyncMock()
mock_client.format_commit_message = AsyncMock(
side_effect=[garbage1, garbage2]
)
with patch(
"auto_commit_service.pipeline.init.get_llm_client",
return_value=mock_client,
):
result = await format_stage._format_commit_message(analysis)
# Should use the retry attempt even though it also failed
assert result.message == "chore(config): 🔧 Update configuration files"
assert mock_client.format_commit_message.call_count == 2
def test_retry_prompt_includes_violations(self, format_stage, analysis) -> None:
"""The retry prompt must include the violation feedback."""
violations = [
"Contains banned phrase matching: ^update\\s+\\d+\\s+(files?|py\\s+files?)",
"Missing action verb",
]
prompt = format_stage._build_retry_prompt(
analysis,
"chore(config): 🔧 Update 5 py files",
violations,
)
assert "rejected by quality checks" in prompt
assert "Update 5 py files" in prompt
assert "banned phrase" in prompt
assert "Missing action verb" in prompt
assert analysis.impact_summary in prompt
assert analysis.suggested_description in prompt
class TestSanitizeMessageScope:
"""Tests for sanitize_message_scope — scope cleanup within formatted messages."""
def test_cleans_reasoning_leak_in_scope(self) -> None:
result = sanitize_message_scope("feat(the primary auth module): ✨ Add login")
assert result == "feat(auth): ✨ Add login"
def test_cleans_hyphenated_reasoning_in_scope(self) -> None:
result = sanitize_message_scope(
"deps-upgrade(theme-primary-as-its-the): ⬆️ Bump vite"
)
assert result == "deps-upgrade(theme): ⬆️ Bump vite"
def test_preserves_clean_scope(self) -> None:
msg = "feat(auth): ✨ Add OAuth2 login support"
assert sanitize_message_scope(msg) == msg
def test_preserves_clean_kebab_scope(self) -> None:
msg = "refactor(pipeline-stages): ♻️ Extract shared logic"
assert sanitize_message_scope(msg) == msg
def test_removes_scope_if_empty_after_sanitization(self) -> None:
result = sanitize_message_scope("chore(the a an is): 🔧 Update something")
assert result == "chore: 🔧 Update something"
def test_no_scope_message_unchanged(self) -> None:
msg = "chore: 🔧 Update something"
assert sanitize_message_scope(msg) == msg
def test_non_matching_message_unchanged(self) -> None:
msg = "not a commit message"
assert sanitize_message_scope(msg) == msg
class TestAutoCorrectFormat:
"""Tests for auto_correct_format — fixes slash and dash separators."""
def test_fixes_slash_format(self) -> None:
result = auto_correct_format("feat/auth: ✨ Add login")
assert result == "feat(auth): ✨ Add login"
def test_fixes_dash_format(self) -> None:
result = auto_correct_format("fix-api: 🐛 Fix timeout")
assert result == "fix(api): 🐛 Fix timeout"
def test_preserves_correct_format(self) -> None:
msg = "feat(auth): ✨ Add login"
assert auto_correct_format(msg) == msg
def test_preserves_non_commit_line(self) -> None:
msg = "This is just a regular line"
assert auto_correct_format(msg) == msg
class TestExtractCommitMessage:
"""Tests for extract_commit_message — extracts from LLM response."""
def test_extracts_from_single_line(self) -> None:
response = "feat(auth): ✨ Add OAuth2 login support"
assert extract_commit_message(response) == response
def test_extracts_from_multiline_with_reasoning(self) -> None:
response = """Let me analyze the changes.
Based on the file changes, this is a feature addition.
feat(auth): ✨ Add OAuth2 login support
This adds the login endpoint."""
assert extract_commit_message(response) == "feat(auth): ✨ Add OAuth2 login support"
def test_skips_reasoning_lines(self) -> None:
response = """feat(auth): ✨ **Reasoning:** Based on analysis
feat(auth): ✨ Add OAuth2 login support"""
assert extract_commit_message(response) == "feat(auth): ✨ Add OAuth2 login support"
def test_auto_corrects_slash_in_response(self) -> None:
response = "feat/auth: ✨ Add OAuth2 login support"
result = extract_commit_message(response)
assert result == "feat(auth): ✨ Add OAuth2 login support"
def test_raises_on_no_valid_message(self) -> None:
with pytest.raises(RuntimeError, match="Failed to extract"):
extract_commit_message("Just some random text with no commit format")
class TestSanitizeMessage:
"""Tests for sanitize_message — removes markdown artifacts."""
def test_removes_bold_markers(self) -> None:
assert sanitize_message("**feat**: add login") == "feat: add login"
def test_removes_code_markers(self) -> None:
assert sanitize_message("`feat`: add login") == "feat: add login"
def test_collapses_multiple_spaces(self) -> None:
assert sanitize_message("feat: add login") == "feat: add login"
class TestValidatorScopeChecks:
"""Tests for validate_commit_message scope validation."""
def test_rejects_generic_src_scope(self) -> None:
result = validate_commit_message("feat(src): ✨ Add new feature")
assert not result.valid
assert any("generic" in v.lower() for v in result.violations)
def test_rejects_generic_lib_scope(self) -> None:
result = validate_commit_message("feat(lib): ✨ Add new feature")
assert not result.valid
def test_rejects_generic_app_scope(self) -> None:
result = validate_commit_message("feat(app): ✨ Add new feature")
assert not result.valid
def test_accepts_specific_scope(self) -> None:
result = validate_commit_message("feat(auth): ✨ Add OAuth2 login support")
assert result.valid
def test_accepts_kebab_scope(self) -> None:
result = validate_commit_message(
"refactor(api-routes): ♻️ Extract shared validation logic"
)
assert result.valid
def test_rejects_scope_with_spaces(self) -> None:
result = validate_commit_message("feat(auth module): ✨ Add login support")
assert not result.valid
assert any("spaces" in v.lower() for v in result.violations)
def test_rejects_overly_long_scope(self) -> None:
long_scope = "a" * 30
result = validate_commit_message(f"feat({long_scope}): ✨ Add new feature")
assert not result.valid
assert any("long" in v.lower() for v in result.violations)
class TestAutoCorrectFormat:
"""auto_correct_format applies _CORRECTIONS table entries in order."""
# --- bare deps normalization ---
def test_bare_deps_no_scope(self) -> None:
result = auto_correct_format("deps: 🔧 Rebuild debug dependencies")
assert result.startswith("deps-upgrade:")
def test_bare_deps_with_scope(self) -> None:
result = auto_correct_format("deps(simulator): 🔧 Rebuild things")
assert result.startswith("deps-upgrade(simulator):")
def test_bare_deps_actual_failing_log_message(self) -> None:
"""Reproduces the exact message from the activity log that was failing."""
msg = (
"deps: 🔧 Rebuild debug dependencies for simulator modules "
"(physics, AI, city, climate, combat, core)"
)
result = auto_correct_format(msg)
assert result.startswith("deps-upgrade:")
assert "Rebuild debug dependencies" in result
def test_deps_upgrade_passes_through_unchanged(self) -> None:
"""Valid deps-upgrade must not be mangled."""
msg = "deps-upgrade(api): ⬆️ bump httpx to 0.27"
assert auto_correct_format(msg) == msg
def test_deps_upgrade_with_scope_passes_through(self) -> None:
msg = "deps-upgrade(config): ⬆️ Update dependency versions"
assert auto_correct_format(msg) == msg
def test_deps_add_passes_through_unchanged(self) -> None:
msg = "deps-add(core): add aiohttp"
assert auto_correct_format(msg) == msg
# --- slash scope correction ---
def test_slash_scope_corrected(self) -> None:
result = auto_correct_format("feat/auth: ✨ Add login")
assert result == "feat(auth): ✨ Add login"
def test_slash_scope_with_hyphen(self) -> None:
result = auto_correct_format("fix/api-routes: 🐛 Fix timeout")
assert result == "fix(api-routes): 🐛 Fix timeout"
# --- dash scope correction ---
def test_dash_scope_corrected(self) -> None:
result = auto_correct_format("chore-config: 🔧 Update rules")
assert result == "chore(config): 🔧 Update rules"
# --- passthrough for valid messages ---
@pytest.mark.parametrize("msg", [
"feat(auth): ✨ Add OAuth2",
"fix(api): 🐛 Resolve null pointer",
"chore(config): 🔧 Update ESLint rules",
"refactor(core): ♻️ Extract shared logic",
"test(auth): ✅ Add integration tests",
])
def test_valid_messages_unchanged(self, msg: str) -> None:
assert auto_correct_format(msg) == msg
def test_unrelated_text_unchanged(self) -> None:
assert auto_correct_format("some random text") == "some random text"
def test_empty_string_unchanged(self) -> None:
assert auto_correct_format("") == ""
# --- end-to-end through extract_commit_message ---
def test_bare_deps_extractable(self) -> None:
"""bare deps: must produce a valid extractable message."""
msg = "deps: 🔧 Rebuild simulator debug modules"
result = extract_commit_message(msg)
assert result.startswith("deps-upgrade:")
def test_corrections_table_is_open_for_extension(self) -> None:
"""_CORRECTIONS is a list — new entries can be appended without modifying the function."""
from auto_commit_service.pipeline.format_utils import _CORRECTIONS
assert isinstance(_CORRECTIONS, list)
assert len(_CORRECTIONS) >= 3