From f302e8366a06e5874252eb98c6cf957e98fe17cf Mon Sep 17 00:00:00 2001 From: Lilith Date: Sun, 11 Jan 2026 00:08:05 -0800 Subject: [PATCH] fix(git): detect tracked-but-ignored files with ls-files -i - Use `git ls-files -i --exclude-standard` to find tracked files now in .gitignore - Combine with `git check-ignore` for comprehensive ignore detection - Batch ls-files calls (1000 files/batch) to avoid ARG_MAX - Prevents "paths are ignored" errors for legacy tracked files (apps/, packages/, test-output/) BREAKING: git check-ignore alone doesn't flag already-tracked files SOLUTION: Two-phase detection (tracked-ignored + new-ignored) Fixes egirl-platform failures: 5,175 tracked-but-ignored files filtered Co-Authored-By: Claude Sonnet 4.5 --- .../__pycache__/operations.cpython-312.pyc | Bin 12787 -> 13444 bytes src/auto_commit_service/git/operations.py | 52 ++++++++++++++---- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc b/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc index 64ab13657b091347fccaa9aaff84330cd3ab162f..dd26ac027e695723417ac72291907e8ae611244c 100644 GIT binary patch delta 1148 zcmaJ>ZD>PP*Ve%C?mz{D0bI+==G! zY*#0m^}?{-HC@6!H!IZtnbv-W2C1r^XmaqJHWDXsElHxPN`^)$Ny%hLJ0DAs=9Hbh zAERV|t9+DSNy-^iRi3cvBUA)m0k_j39urzy=^h|HnO=a2p6Bo$hm!yWNsSGR#S5Y~ zGUOU9NHjVUkI^oO_$H;OA%wk7YVA7k$1oD- zFzY8%>5_~*)e}dH7Tff}b<2wj!)uo2qO4rC&)AJ>|CfiBTTI1AN zqcZwL9w@r2=PrEiT^L=jX-nZXq?p{8zYFatI9_TYpNArPn)KFvjK+G6U<_{)~17pUni%%Svik_D;uVotz zPk5LEJ_?ZQH_W$BQu7-uNjA|&6Z2y`PS@GqS$Wu9zmm?)V+7ZCd zOO43+CjPBtM%H8a_e2LA`NN7(75)r|6mh^fZJAk8|~}h}cW;F`Q9Lh|fzM%cgr>Wxi6m NvFR&?G3(v;^FO4#0vVcYMTJ1BCx2HK{bc$P&obWSP8KQH!;h1<0QKTTzC)49GYK#KmhFCMzkm27F;-;Fg|Y zxXdW=&UowQh{ zIq?CRhg2AybXX3lvVqt-j81x+k0^68F;-4Kr&`5Y2h{L;y;^MQL*BP-eDv2?AGA>B@zyP8*nCxKQXbE8-5cv!i`(QNrs_}XN D+;M!3 diff --git a/src/auto_commit_service/git/operations.py b/src/auto_commit_service/git/operations.py index a94c09e..e5a0df4 100644 --- a/src/auto_commit_service/git/operations.py +++ b/src/auto_commit_service/git/operations.py @@ -166,7 +166,7 @@ async def git_add_all(repo_path: Path) -> None: async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]: - """Check which files are ignored by .gitignore. + """Check which files are ignored by .gitignore, including tracked files. Args: repo_path: Path to the repository @@ -179,27 +179,55 @@ async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]: return [] try: - # Use git check-ignore to filter out ignored files - # --stdin allows us to check multiple files efficiently - # -v flag would show matches, but we want non-matches - # Exit code 0 = files are ignored, 1 = files are NOT ignored + # CRITICAL: git check-ignore won't flag tracked files, even if now in .gitignore + # Use `ls-files -i --exclude-standard` to find tracked-but-ignored files file_list = "\n".join(files) - stdout, stderr, returncode = await _run_git_command( + + # Check for tracked-but-ignored files (batch to avoid ARG_MAX) + tracked_ignored = set() + BATCH_SIZE = 1000 + if len(files) > BATCH_SIZE: + for i in range(0, len(files), BATCH_SIZE): + batch = files[i : i + BATCH_SIZE] + stdout, _, _ = await _run_git_command( + "ls-files", "-i", "--exclude-standard", "--", + *batch, + cwd=repo_path, + check=False, + ) + if stdout.strip(): + tracked_ignored.update(stdout.strip().split("\n")) + else: + tracked_ignored_stdout, _, _ = await _run_git_command( + "ls-files", "-i", "--exclude-standard", "--", + *files, + cwd=repo_path, + check=False, + ) + if tracked_ignored_stdout.strip(): + tracked_ignored = set(tracked_ignored_stdout.strip().split("\n")) + + # Also check for untracked/new ignored files (uses stdin, no ARG_MAX issue) + untracked_ignored_stdout, _, _ = await _run_git_command( "check-ignore", "--stdin", cwd=repo_path, check=False, stdin=file_list.encode() ) + untracked_ignored = set(untracked_ignored_stdout.strip().split("\n")) if untracked_ignored_stdout.strip() else set() - # Files in stdout are ignored - we want to exclude these - ignored_files = set(stdout.strip().split("\n")) if stdout.strip() else set() + # Combine both sets of ignored files + all_ignored = tracked_ignored | untracked_ignored + + if all_ignored: + logger.info(f"Filtered {len(all_ignored)} gitignored files ({len(tracked_ignored)} tracked-but-ignored, {len(untracked_ignored)} new)") # Return only non-ignored files - return [f for f in files if f not in ignored_files] + return [f for f in files if f not in all_ignored] - except GitError: - # If check-ignore fails, return all files (safer than blocking commits) - logger.warning(f"git check-ignore failed in {repo_path}, proceeding without filter") + except GitError as e: + # If check fails, return all files (safer than blocking commits) + logger.warning(f"git ignore check failed in {repo_path}: {e}, proceeding without filter") return files