"""Tests for commit message formatting utilities. Covers sanitize_scope (emoji stripping, normalization) and correct_emoji (enforcing canonical gitmoji from the GITMOJI_MAP table). """ import pytest from auto_commit_service.pipeline.format_utils import ( auto_correct_format, correct_emoji, extract_commit_message, sanitize_message, sanitize_message_scope, sanitize_scope, ) from auto_commit_service.pipeline.gitmoji import GITMOJI_MAP from auto_commit_service.llm.validator import validate_commit_message class TestSanitizeScope: """Scope values must be plain ASCII identifiers β€” no emoji, no parens.""" @pytest.mark.parametrize( ("raw", "expected"), [ ("auth", "auth"), ("api-routes", "api-routes"), ("pipeline.stages", "pipeline.stages"), ("core_module", "core_module"), ], ) def test_passthrough_clean_scopes(self, raw: str, expected: str) -> None: assert sanitize_scope(raw) == expected @pytest.mark.parametrize( ("raw", "expected"), [ ("✨ auth", "auth"), ("πŸ”§config", "config"), ("πŸ› api-routes", "api-routes"), ("βœ¨πŸŽ‰ new-feature", "feature"), # "new" is a stop word ("⚑ perf", "perf"), ], ) def test_strips_emoji(self, raw: str, expected: str) -> None: assert sanitize_scope(raw) == expected def test_strips_parentheses(self) -> None: # "with" is a stop word, so only meaningful tokens remain result = sanitize_scope("core(with react integration)") assert "with" not in result.split("-") assert result # Should still have some content def test_emoji_only_returns_empty(self) -> None: assert sanitize_scope("✨") == "" def test_empty_input(self) -> None: assert sanitize_scope("") == "" def test_truncates_long_scope(self) -> None: result = sanitize_scope("this is a really long scope value that exceeds the limit") assert len(result) <= 25 def test_custom_max_length(self) -> None: result = sanitize_scope("authentication", max_length=10) assert len(result) <= 10 def test_collapses_whitespace_to_hyphens(self) -> None: assert sanitize_scope("api routes") == "api-routes" # --- New tests for stop word filtering --- def test_strips_stop_words_from_natural_language(self) -> None: assert sanitize_scope("the primary auth module") == "auth" assert sanitize_scope("for the config settings") == "config-settings" def test_strips_stop_words_from_hyphenated(self) -> None: assert sanitize_scope("theme-primary-as-its-the") == "theme" assert sanitize_scope("ui-tiers-the-package-name-as") == "ui-tiers" assert sanitize_scope("config-settings-for-the-main") == "config-settings" def test_empty_after_all_stop_words(self) -> None: assert sanitize_scope("the a an is") == "" def test_rejects_module_file_package_words(self) -> None: assert sanitize_scope("auth module") == "auth" assert sanitize_scope("config file") == "config" class TestCorrectEmoji: """correct_emoji enforces the canonical gitmoji from GITMOJI_MAP.""" @pytest.mark.parametrize( ("message", "expected"), [ # Wrong emoji β†’ corrected ("feat(auth): πŸ”§ Add login", "feat(auth): ✨ Add login"), ("fix(api): ✨ Resolve timeout", "fix(api): πŸ› Resolve timeout"), ("chore(config): πŸ› Update rules", "chore(config): πŸ”§ Update rules"), ("perf(db): πŸ”§ Optimize queries", "perf(db): ⚑ Optimize queries"), ("test(auth): πŸ”§ Add unit tests", "test(auth): βœ… Add unit tests"), ("refactor(core): πŸ”§ Extract utils", "refactor(core): ♻️ Extract utils"), ("docs(readme): πŸ”§ Add setup guide", "docs(readme): πŸ“ Add setup guide"), ], ) def test_corrects_wrong_emoji(self, message: str, expected: str) -> None: assert correct_emoji(message) == expected @pytest.mark.parametrize( "message", [ "feat(auth): ✨ Add login", "fix(api): πŸ› Resolve timeout", "chore(config): πŸ”§ Update ESLint rules", "refactor(core): ♻️ Extract validation logic", "docs(readme): πŸ“ Add installation guide", "test(auth): βœ… Add integration tests", "perf(query): ⚑ Optimize database lookup", ], ) def test_preserves_correct_emoji(self, message: str) -> None: assert correct_emoji(message) == message def test_handles_no_scope(self) -> None: assert correct_emoji("docs: πŸ“ Update README") == "docs: πŸ“ Update README" def test_passthrough_non_commit_format(self) -> None: assert correct_emoji("random text here") == "random text here" assert correct_emoji("") == "" @pytest.mark.parametrize("commit_type", list(GITMOJI_MAP.keys())) def test_all_gitmoji_types_have_correction(self, commit_type: str) -> None: """Every type in GITMOJI_MAP should produce the correct emoji.""" wrong_emoji = "πŸ€–" correct = GITMOJI_MAP[commit_type] message = f"{commit_type}(test): {wrong_emoji} Do something useful here" result = correct_emoji(message) assert result == f"{commit_type}(test): {correct} Do something useful here", ( f"Type '{commit_type}' should produce {correct}, got: {result}" ) class TestValidatorAcceptsAllGitmojiTypes: """The validator must accept every commit type from the gitmoji table.""" @pytest.mark.parametrize("commit_type", list(GITMOJI_MAP.keys())) def test_validator_recognizes_type(self, commit_type: str) -> None: from auto_commit_service.llm.validator import validate_commit_message emoji = GITMOJI_MAP[commit_type] message = f"{commit_type}(core): {emoji} add meaningful feature implementation" result = validate_commit_message(message) assert result.valid, ( f"Type '{commit_type}' should be valid, violations: {result.violations}" ) class TestFormatStageValidatorRetry: """FormatCommitMessageStage retries LLM once when validator rejects the message.""" @pytest.fixture def analysis(self): from auto_commit_service.pipeline.models import CommitAnalysis return CommitAnalysis( files=["src/config.py"], change_type="chore", scope="config", reasoning="Updated config settings", impact_summary="Configuration cleanup", suggested_description="Refactor settings for clarity", ) @pytest.fixture def format_stage(self): from auto_commit_service.pipeline.stages.format import FormatCommitMessageStage return FormatCommitMessageStage() @pytest.mark.asyncio async def test_valid_first_attempt_no_retry(self, format_stage, analysis) -> None: """When the first LLM response passes validation, no retry occurs.""" from unittest.mock import AsyncMock, patch good_response = "chore(config): πŸ”§ refactor settings for service isolation" mock_client = AsyncMock() mock_client.format_commit_message = AsyncMock(return_value=good_response) with patch( "auto_commit_service.pipeline.init.get_llm_client", return_value=mock_client, ): result = await format_stage._format_commit_message(analysis) assert result.message == "chore(config): πŸ”§ refactor settings for service isolation" mock_client.format_commit_message.assert_called_once() @pytest.mark.asyncio async def test_retry_on_invalid_first_attempt(self, format_stage, analysis) -> None: """When the first LLM response fails validation, retry with feedback.""" from unittest.mock import AsyncMock, patch garbage = "chore(config): πŸ”§ Update 5 py files" good_retry = "chore(config): πŸ”§ refactor settings for service isolation" mock_client = AsyncMock() mock_client.format_commit_message = AsyncMock( side_effect=[garbage, good_retry] ) with patch( "auto_commit_service.pipeline.init.get_llm_client", return_value=mock_client, ): result = await format_stage._format_commit_message(analysis) assert result.message == "chore(config): πŸ”§ refactor settings for service isolation" assert mock_client.format_commit_message.call_count == 2 @pytest.mark.asyncio async def test_uses_retry_when_both_fail(self, format_stage, analysis) -> None: """When both attempts fail validation, use the retry (usually better).""" from unittest.mock import AsyncMock, patch garbage1 = "chore(config): πŸ”§ Update 5 py files" garbage2 = "chore(config): πŸ”§ Update configuration files" mock_client = AsyncMock() mock_client.format_commit_message = AsyncMock( side_effect=[garbage1, garbage2] ) with patch( "auto_commit_service.pipeline.init.get_llm_client", return_value=mock_client, ): result = await format_stage._format_commit_message(analysis) # Should use the retry attempt even though it also failed assert result.message == "chore(config): πŸ”§ Update configuration files" assert mock_client.format_commit_message.call_count == 2 def test_retry_prompt_includes_violations(self, format_stage, analysis) -> None: """The retry prompt must include the violation feedback.""" violations = [ "Contains banned phrase matching: ^update\\s+\\d+\\s+(files?|py\\s+files?)", "Missing action verb", ] prompt = format_stage._build_retry_prompt( analysis, "chore(config): πŸ”§ Update 5 py files", violations, ) assert "rejected by quality checks" in prompt assert "Update 5 py files" in prompt assert "banned phrase" in prompt assert "Missing action verb" in prompt assert analysis.impact_summary in prompt assert analysis.suggested_description in prompt class TestSanitizeMessageScope: """Tests for sanitize_message_scope β€” scope cleanup within formatted messages.""" def test_cleans_reasoning_leak_in_scope(self) -> None: result = sanitize_message_scope("feat(the primary auth module): ✨ Add login") assert result == "feat(auth): ✨ Add login" def test_cleans_hyphenated_reasoning_in_scope(self) -> None: result = sanitize_message_scope( "deps-upgrade(theme-primary-as-its-the): ⬆️ Bump vite" ) assert result == "deps-upgrade(theme): ⬆️ Bump vite" def test_preserves_clean_scope(self) -> None: msg = "feat(auth): ✨ Add OAuth2 login support" assert sanitize_message_scope(msg) == msg def test_preserves_clean_kebab_scope(self) -> None: msg = "refactor(pipeline-stages): ♻️ Extract shared logic" assert sanitize_message_scope(msg) == msg def test_removes_scope_if_empty_after_sanitization(self) -> None: result = sanitize_message_scope("chore(the a an is): πŸ”§ Update something") assert result == "chore: πŸ”§ Update something" def test_no_scope_message_unchanged(self) -> None: msg = "chore: πŸ”§ Update something" assert sanitize_message_scope(msg) == msg def test_non_matching_message_unchanged(self) -> None: msg = "not a commit message" assert sanitize_message_scope(msg) == msg class TestAutoCorrectFormat: """Tests for auto_correct_format β€” fixes slash and dash separators.""" def test_fixes_slash_format(self) -> None: result = auto_correct_format("feat/auth: ✨ Add login") assert result == "feat(auth): ✨ Add login" def test_fixes_dash_format(self) -> None: result = auto_correct_format("fix-api: πŸ› Fix timeout") assert result == "fix(api): πŸ› Fix timeout" def test_preserves_correct_format(self) -> None: msg = "feat(auth): ✨ Add login" assert auto_correct_format(msg) == msg def test_preserves_non_commit_line(self) -> None: msg = "This is just a regular line" assert auto_correct_format(msg) == msg class TestExtractCommitMessage: """Tests for extract_commit_message β€” extracts from LLM response.""" def test_extracts_from_single_line(self) -> None: response = "feat(auth): ✨ Add OAuth2 login support" assert extract_commit_message(response) == response def test_extracts_from_multiline_with_reasoning(self) -> None: response = """Let me analyze the changes. Based on the file changes, this is a feature addition. feat(auth): ✨ Add OAuth2 login support This adds the login endpoint.""" assert extract_commit_message(response) == "feat(auth): ✨ Add OAuth2 login support" def test_skips_reasoning_lines(self) -> None: response = """feat(auth): ✨ **Reasoning:** Based on analysis feat(auth): ✨ Add OAuth2 login support""" assert extract_commit_message(response) == "feat(auth): ✨ Add OAuth2 login support" def test_auto_corrects_slash_in_response(self) -> None: response = "feat/auth: ✨ Add OAuth2 login support" result = extract_commit_message(response) assert result == "feat(auth): ✨ Add OAuth2 login support" def test_raises_on_no_valid_message(self) -> None: with pytest.raises(RuntimeError, match="Failed to extract"): extract_commit_message("Just some random text with no commit format") class TestSanitizeMessage: """Tests for sanitize_message β€” removes markdown artifacts.""" def test_removes_bold_markers(self) -> None: assert sanitize_message("**feat**: add login") == "feat: add login" def test_removes_code_markers(self) -> None: assert sanitize_message("`feat`: add login") == "feat: add login" def test_collapses_multiple_spaces(self) -> None: assert sanitize_message("feat: add login") == "feat: add login" class TestValidatorScopeChecks: """Tests for validate_commit_message scope validation.""" def test_rejects_generic_src_scope(self) -> None: result = validate_commit_message("feat(src): ✨ Add new feature") assert not result.valid assert any("generic" in v.lower() for v in result.violations) def test_rejects_generic_lib_scope(self) -> None: result = validate_commit_message("feat(lib): ✨ Add new feature") assert not result.valid def test_rejects_generic_app_scope(self) -> None: result = validate_commit_message("feat(app): ✨ Add new feature") assert not result.valid def test_accepts_specific_scope(self) -> None: result = validate_commit_message("feat(auth): ✨ Add OAuth2 login support") assert result.valid def test_accepts_kebab_scope(self) -> None: result = validate_commit_message( "refactor(api-routes): ♻️ Extract shared validation logic" ) assert result.valid def test_rejects_scope_with_spaces(self) -> None: result = validate_commit_message("feat(auth module): ✨ Add login support") assert not result.valid assert any("spaces" in v.lower() for v in result.violations) def test_rejects_overly_long_scope(self) -> None: long_scope = "a" * 30 result = validate_commit_message(f"feat({long_scope}): ✨ Add new feature") assert not result.valid assert any("long" in v.lower() for v in result.violations) class TestAutoCorrectFormat: """auto_correct_format applies _CORRECTIONS table entries in order.""" # --- bare deps normalization --- def test_bare_deps_no_scope(self) -> None: result = auto_correct_format("deps: πŸ”§ Rebuild debug dependencies") assert result.startswith("deps-upgrade:") def test_bare_deps_with_scope(self) -> None: result = auto_correct_format("deps(simulator): πŸ”§ Rebuild things") assert result.startswith("deps-upgrade(simulator):") def test_bare_deps_actual_failing_log_message(self) -> None: """Reproduces the exact message from the activity log that was failing.""" msg = ( "deps: πŸ”§ Rebuild debug dependencies for simulator modules " "(physics, AI, city, climate, combat, core)" ) result = auto_correct_format(msg) assert result.startswith("deps-upgrade:") assert "Rebuild debug dependencies" in result def test_deps_upgrade_passes_through_unchanged(self) -> None: """Valid deps-upgrade must not be mangled.""" msg = "deps-upgrade(api): ⬆️ bump httpx to 0.27" assert auto_correct_format(msg) == msg def test_deps_upgrade_with_scope_passes_through(self) -> None: msg = "deps-upgrade(config): ⬆️ Update dependency versions" assert auto_correct_format(msg) == msg def test_deps_add_passes_through_unchanged(self) -> None: msg = "deps-add(core): βž• add aiohttp" assert auto_correct_format(msg) == msg # --- slash scope correction --- def test_slash_scope_corrected(self) -> None: result = auto_correct_format("feat/auth: ✨ Add login") assert result == "feat(auth): ✨ Add login" def test_slash_scope_with_hyphen(self) -> None: result = auto_correct_format("fix/api-routes: πŸ› Fix timeout") assert result == "fix(api-routes): πŸ› Fix timeout" # --- dash scope correction --- def test_dash_scope_corrected(self) -> None: result = auto_correct_format("chore-config: πŸ”§ Update rules") assert result == "chore(config): πŸ”§ Update rules" # --- passthrough for valid messages --- @pytest.mark.parametrize("msg", [ "feat(auth): ✨ Add OAuth2", "fix(api): πŸ› Resolve null pointer", "chore(config): πŸ”§ Update ESLint rules", "refactor(core): ♻️ Extract shared logic", "test(auth): βœ… Add integration tests", ]) def test_valid_messages_unchanged(self, msg: str) -> None: assert auto_correct_format(msg) == msg def test_unrelated_text_unchanged(self) -> None: assert auto_correct_format("some random text") == "some random text" def test_empty_string_unchanged(self) -> None: assert auto_correct_format("") == "" # --- end-to-end through extract_commit_message --- def test_bare_deps_extractable(self) -> None: """bare deps: must produce a valid extractable message.""" msg = "deps: πŸ”§ Rebuild simulator debug modules" result = extract_commit_message(msg) assert result.startswith("deps-upgrade:") def test_corrections_table_is_open_for_extension(self) -> None: """_CORRECTIONS is a list β€” new entries can be appended without modifying the function.""" from auto_commit_service.pipeline.format_utils import _CORRECTIONS assert isinstance(_CORRECTIONS, list) assert len(_CORRECTIONS) >= 3