fix(git): use add --dry-run for reliable ignore detection

BREAKING CHANGE: git ls-files -i only works on staged files
BREAKING CHANGE: git check-ignore doesn't flag tracked-but-ignored files

SOLUTION: Use `git add --dry-run` to test which files will actually stage
- Detects all ignore cases: new files, tracked files, directory patterns
- Parse stderr to extract ignored file list when exit code 128
- Batch processing (1000 files/batch) to avoid ARG_MAX

Fixes: @egirl/egirl-platform with 5,175 files under ignored /apps/ /packages/ directories

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Lilith 2026-01-11 00:31:00 -08:00
parent f302e8366a
commit 2473220592
2 changed files with 43 additions and 39 deletions

View file

@ -166,64 +166,68 @@ async def git_add_all(repo_path: Path) -> None:
async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]:
"""Check which files are ignored by .gitignore, including tracked files.
"""Check which files git will refuse to stage due to .gitignore.
Uses `git add --dry-run` to detect files that will fail to stage,
including tracked files now covered by .gitignore patterns.
Args:
repo_path: Path to the repository
files: List of file paths to check
Returns:
List of files that are NOT ignored (safe to stage)
List of files that are safe to stage (not ignored)
"""
if not files:
return []
try:
# CRITICAL: git check-ignore won't flag tracked files, even if now in .gitignore
# Use `ls-files -i --exclude-standard` to find tracked-but-ignored files
file_list = "\n".join(files)
# Check for tracked-but-ignored files (batch to avoid ARG_MAX)
tracked_ignored = set()
# CRITICAL: The only reliable way to detect stageable files is to try staging them
# with --dry-run. Neither check-ignore nor ls-files -i work for tracked-but-ignored files.
# Batch to avoid ARG_MAX
BATCH_SIZE = 1000
if len(files) > BATCH_SIZE:
for i in range(0, len(files), BATCH_SIZE):
batch = files[i : i + BATCH_SIZE]
stdout, _, _ = await _run_git_command(
"ls-files", "-i", "--exclude-standard", "--",
*batch,
cwd=repo_path,
check=False,
)
if stdout.strip():
tracked_ignored.update(stdout.strip().split("\n"))
else:
tracked_ignored_stdout, _, _ = await _run_git_command(
"ls-files", "-i", "--exclude-standard", "--",
*files,
safe_files = []
ignored_files = []
batches = [files[i : i + BATCH_SIZE] for i in range(0, len(files), BATCH_SIZE)]
for batch in batches:
# Try dry-run add - exit code 0 = success, 128 = some files ignored
stdout, stderr, returncode = await _run_git_command(
"add", "--dry-run", "--",
*batch,
cwd=repo_path,
check=False,
)
if tracked_ignored_stdout.strip():
tracked_ignored = set(tracked_ignored_stdout.strip().split("\n"))
# Also check for untracked/new ignored files (uses stdin, no ARG_MAX issue)
untracked_ignored_stdout, _, _ = await _run_git_command(
"check-ignore", "--stdin",
cwd=repo_path,
check=False,
stdin=file_list.encode()
)
untracked_ignored = set(untracked_ignored_stdout.strip().split("\n")) if untracked_ignored_stdout.strip() else set()
if returncode == 0:
# All files in batch are safe
safe_files.extend(batch)
elif returncode == 128 and "ignored" in stderr.lower():
# Some files are ignored - parse error message to find which ones
# Git error format: "The following paths are ignored by one of your .gitignore files:\npath1\npath2"
lines = stderr.split("\n")
for i, line in enumerate(lines):
if "following paths are ignored" in line.lower():
# Next lines until "hint:" are the ignored paths
for ignored_line in lines[i + 1:]:
if ignored_line.startswith("hint:") or not ignored_line.strip():
break
ignored_path = ignored_line.strip()
if ignored_path:
ignored_files.append(ignored_path)
# Combine both sets of ignored files
all_ignored = tracked_ignored | untracked_ignored
# Files not in ignored list are safe
safe_files.extend([f for f in batch if f not in ignored_files and not any(f.startswith(ig + "/") for ig in ignored_files)])
else:
# Unknown error - be conservative and include all files
logger.warning(f"git add --dry-run returned {returncode}: {stderr}, including all files from batch")
safe_files.extend(batch)
if all_ignored:
logger.info(f"Filtered {len(all_ignored)} gitignored files ({len(tracked_ignored)} tracked-but-ignored, {len(untracked_ignored)} new)")
if ignored_files:
logger.info(f"Filtered {len(ignored_files)} gitignored files (detected via dry-run)")
# Return only non-ignored files
return [f for f in files if f not in all_ignored]
return safe_files
except GitError as e:
# If check fails, return all files (safer than blocking commits)