fix(git): detect tracked-but-ignored files with ls-files -i

- Use `git ls-files -i --exclude-standard` to find tracked files now in .gitignore
- Combine with `git check-ignore` for comprehensive ignore detection
- Batch ls-files calls (1000 files/batch) to avoid ARG_MAX
- Prevents "paths are ignored" errors for legacy tracked files (apps/, packages/, test-output/)

BREAKING: git check-ignore alone doesn't flag already-tracked files
SOLUTION: Two-phase detection (tracked-ignored + new-ignored)

Fixes egirl-platform failures: 5,175 tracked-but-ignored files filtered

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Lilith 2026-01-11 00:08:05 -08:00
parent bbd8220232
commit f302e8366a
2 changed files with 40 additions and 12 deletions

View file

@ -166,7 +166,7 @@ async def git_add_all(repo_path: Path) -> None:
async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]:
"""Check which files are ignored by .gitignore.
"""Check which files are ignored by .gitignore, including tracked files.
Args:
repo_path: Path to the repository
@ -179,27 +179,55 @@ async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]:
return []
try:
# Use git check-ignore to filter out ignored files
# --stdin allows us to check multiple files efficiently
# -v flag would show matches, but we want non-matches
# Exit code 0 = files are ignored, 1 = files are NOT ignored
# CRITICAL: git check-ignore won't flag tracked files, even if now in .gitignore
# Use `ls-files -i --exclude-standard` to find tracked-but-ignored files
file_list = "\n".join(files)
stdout, stderr, returncode = await _run_git_command(
# Check for tracked-but-ignored files (batch to avoid ARG_MAX)
tracked_ignored = set()
BATCH_SIZE = 1000
if len(files) > BATCH_SIZE:
for i in range(0, len(files), BATCH_SIZE):
batch = files[i : i + BATCH_SIZE]
stdout, _, _ = await _run_git_command(
"ls-files", "-i", "--exclude-standard", "--",
*batch,
cwd=repo_path,
check=False,
)
if stdout.strip():
tracked_ignored.update(stdout.strip().split("\n"))
else:
tracked_ignored_stdout, _, _ = await _run_git_command(
"ls-files", "-i", "--exclude-standard", "--",
*files,
cwd=repo_path,
check=False,
)
if tracked_ignored_stdout.strip():
tracked_ignored = set(tracked_ignored_stdout.strip().split("\n"))
# Also check for untracked/new ignored files (uses stdin, no ARG_MAX issue)
untracked_ignored_stdout, _, _ = await _run_git_command(
"check-ignore", "--stdin",
cwd=repo_path,
check=False,
stdin=file_list.encode()
)
untracked_ignored = set(untracked_ignored_stdout.strip().split("\n")) if untracked_ignored_stdout.strip() else set()
# Files in stdout are ignored - we want to exclude these
ignored_files = set(stdout.strip().split("\n")) if stdout.strip() else set()
# Combine both sets of ignored files
all_ignored = tracked_ignored | untracked_ignored
if all_ignored:
logger.info(f"Filtered {len(all_ignored)} gitignored files ({len(tracked_ignored)} tracked-but-ignored, {len(untracked_ignored)} new)")
# Return only non-ignored files
return [f for f in files if f not in ignored_files]
return [f for f in files if f not in all_ignored]
except GitError:
# If check-ignore fails, return all files (safer than blocking commits)
logger.warning(f"git check-ignore failed in {repo_path}, proceeding without filter")
except GitError as e:
# If check fails, return all files (safer than blocking commits)
logger.warning(f"git ignore check failed in {repo_path}: {e}, proceeding without filter")
return files