diff --git a/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc b/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc index dd26ac0..f434fee 100644 Binary files a/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc and b/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc differ diff --git a/src/auto_commit_service/git/operations.py b/src/auto_commit_service/git/operations.py index e5a0df4..73b28e0 100644 --- a/src/auto_commit_service/git/operations.py +++ b/src/auto_commit_service/git/operations.py @@ -166,64 +166,68 @@ async def git_add_all(repo_path: Path) -> None: async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]: - """Check which files are ignored by .gitignore, including tracked files. + """Check which files git will refuse to stage due to .gitignore. + + Uses `git add --dry-run` to detect files that will fail to stage, + including tracked files now covered by .gitignore patterns. Args: repo_path: Path to the repository files: List of file paths to check Returns: - List of files that are NOT ignored (safe to stage) + List of files that are safe to stage (not ignored) """ if not files: return [] try: - # CRITICAL: git check-ignore won't flag tracked files, even if now in .gitignore - # Use `ls-files -i --exclude-standard` to find tracked-but-ignored files - file_list = "\n".join(files) - - # Check for tracked-but-ignored files (batch to avoid ARG_MAX) - tracked_ignored = set() + # CRITICAL: The only reliable way to detect stageable files is to try staging them + # with --dry-run. Neither check-ignore nor ls-files -i work for tracked-but-ignored files. + # Batch to avoid ARG_MAX BATCH_SIZE = 1000 - if len(files) > BATCH_SIZE: - for i in range(0, len(files), BATCH_SIZE): - batch = files[i : i + BATCH_SIZE] - stdout, _, _ = await _run_git_command( - "ls-files", "-i", "--exclude-standard", "--", - *batch, - cwd=repo_path, - check=False, - ) - if stdout.strip(): - tracked_ignored.update(stdout.strip().split("\n")) - else: - tracked_ignored_stdout, _, _ = await _run_git_command( - "ls-files", "-i", "--exclude-standard", "--", - *files, + safe_files = [] + ignored_files = [] + + batches = [files[i : i + BATCH_SIZE] for i in range(0, len(files), BATCH_SIZE)] + + for batch in batches: + # Try dry-run add - exit code 0 = success, 128 = some files ignored + stdout, stderr, returncode = await _run_git_command( + "add", "--dry-run", "--", + *batch, cwd=repo_path, check=False, ) - if tracked_ignored_stdout.strip(): - tracked_ignored = set(tracked_ignored_stdout.strip().split("\n")) - # Also check for untracked/new ignored files (uses stdin, no ARG_MAX issue) - untracked_ignored_stdout, _, _ = await _run_git_command( - "check-ignore", "--stdin", - cwd=repo_path, - check=False, - stdin=file_list.encode() - ) - untracked_ignored = set(untracked_ignored_stdout.strip().split("\n")) if untracked_ignored_stdout.strip() else set() + if returncode == 0: + # All files in batch are safe + safe_files.extend(batch) + elif returncode == 128 and "ignored" in stderr.lower(): + # Some files are ignored - parse error message to find which ones + # Git error format: "The following paths are ignored by one of your .gitignore files:\npath1\npath2" + lines = stderr.split("\n") + for i, line in enumerate(lines): + if "following paths are ignored" in line.lower(): + # Next lines until "hint:" are the ignored paths + for ignored_line in lines[i + 1:]: + if ignored_line.startswith("hint:") or not ignored_line.strip(): + break + ignored_path = ignored_line.strip() + if ignored_path: + ignored_files.append(ignored_path) - # Combine both sets of ignored files - all_ignored = tracked_ignored | untracked_ignored + # Files not in ignored list are safe + safe_files.extend([f for f in batch if f not in ignored_files and not any(f.startswith(ig + "/") for ig in ignored_files)]) + else: + # Unknown error - be conservative and include all files + logger.warning(f"git add --dry-run returned {returncode}: {stderr}, including all files from batch") + safe_files.extend(batch) - if all_ignored: - logger.info(f"Filtered {len(all_ignored)} gitignored files ({len(tracked_ignored)} tracked-but-ignored, {len(untracked_ignored)} new)") + if ignored_files: + logger.info(f"Filtered {len(ignored_files)} gitignored files (detected via dry-run)") - # Return only non-ignored files - return [f for f in files if f not in all_ignored] + return safe_files except GitError as e: # If check fails, return all files (safer than blocking commits)