fix(git): detect tracked-but-ignored files with ls-files -i
- Use `git ls-files -i --exclude-standard` to find tracked files now in .gitignore - Combine with `git check-ignore` for comprehensive ignore detection - Batch ls-files calls (1000 files/batch) to avoid ARG_MAX - Prevents "paths are ignored" errors for legacy tracked files (apps/, packages/, test-output/) BREAKING: git check-ignore alone doesn't flag already-tracked files SOLUTION: Two-phase detection (tracked-ignored + new-ignored) Fixes egirl-platform failures: 5,175 tracked-but-ignored files filtered Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
bbd8220232
commit
f302e8366a
2 changed files with 40 additions and 12 deletions
Binary file not shown.
|
|
@ -166,7 +166,7 @@ async def git_add_all(repo_path: Path) -> None:
|
|||
|
||||
|
||||
async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]:
|
||||
"""Check which files are ignored by .gitignore.
|
||||
"""Check which files are ignored by .gitignore, including tracked files.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the repository
|
||||
|
|
@ -179,27 +179,55 @@ async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]:
|
|||
return []
|
||||
|
||||
try:
|
||||
# Use git check-ignore to filter out ignored files
|
||||
# --stdin allows us to check multiple files efficiently
|
||||
# -v flag would show matches, but we want non-matches
|
||||
# Exit code 0 = files are ignored, 1 = files are NOT ignored
|
||||
# CRITICAL: git check-ignore won't flag tracked files, even if now in .gitignore
|
||||
# Use `ls-files -i --exclude-standard` to find tracked-but-ignored files
|
||||
file_list = "\n".join(files)
|
||||
stdout, stderr, returncode = await _run_git_command(
|
||||
|
||||
# Check for tracked-but-ignored files (batch to avoid ARG_MAX)
|
||||
tracked_ignored = set()
|
||||
BATCH_SIZE = 1000
|
||||
if len(files) > BATCH_SIZE:
|
||||
for i in range(0, len(files), BATCH_SIZE):
|
||||
batch = files[i : i + BATCH_SIZE]
|
||||
stdout, _, _ = await _run_git_command(
|
||||
"ls-files", "-i", "--exclude-standard", "--",
|
||||
*batch,
|
||||
cwd=repo_path,
|
||||
check=False,
|
||||
)
|
||||
if stdout.strip():
|
||||
tracked_ignored.update(stdout.strip().split("\n"))
|
||||
else:
|
||||
tracked_ignored_stdout, _, _ = await _run_git_command(
|
||||
"ls-files", "-i", "--exclude-standard", "--",
|
||||
*files,
|
||||
cwd=repo_path,
|
||||
check=False,
|
||||
)
|
||||
if tracked_ignored_stdout.strip():
|
||||
tracked_ignored = set(tracked_ignored_stdout.strip().split("\n"))
|
||||
|
||||
# Also check for untracked/new ignored files (uses stdin, no ARG_MAX issue)
|
||||
untracked_ignored_stdout, _, _ = await _run_git_command(
|
||||
"check-ignore", "--stdin",
|
||||
cwd=repo_path,
|
||||
check=False,
|
||||
stdin=file_list.encode()
|
||||
)
|
||||
untracked_ignored = set(untracked_ignored_stdout.strip().split("\n")) if untracked_ignored_stdout.strip() else set()
|
||||
|
||||
# Files in stdout are ignored - we want to exclude these
|
||||
ignored_files = set(stdout.strip().split("\n")) if stdout.strip() else set()
|
||||
# Combine both sets of ignored files
|
||||
all_ignored = tracked_ignored | untracked_ignored
|
||||
|
||||
if all_ignored:
|
||||
logger.info(f"Filtered {len(all_ignored)} gitignored files ({len(tracked_ignored)} tracked-but-ignored, {len(untracked_ignored)} new)")
|
||||
|
||||
# Return only non-ignored files
|
||||
return [f for f in files if f not in ignored_files]
|
||||
return [f for f in files if f not in all_ignored]
|
||||
|
||||
except GitError:
|
||||
# If check-ignore fails, return all files (safer than blocking commits)
|
||||
logger.warning(f"git check-ignore failed in {repo_path}, proceeding without filter")
|
||||
except GitError as e:
|
||||
# If check fails, return all files (safer than blocking commits)
|
||||
logger.warning(f"git ignore check failed in {repo_path}: {e}, proceeding without filter")
|
||||
return files
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue