From 2473220592c083db8df97f0b47db1f4b33e08cea Mon Sep 17 00:00:00 2001 From: Lilith Date: Sun, 11 Jan 2026 00:31:00 -0800 Subject: [PATCH] fix(git): use add --dry-run for reliable ignore detection BREAKING CHANGE: git ls-files -i only works on staged files BREAKING CHANGE: git check-ignore doesn't flag tracked-but-ignored files SOLUTION: Use `git add --dry-run` to test which files will actually stage - Detects all ignore cases: new files, tracked files, directory patterns - Parse stderr to extract ignored file list when exit code 128 - Batch processing (1000 files/batch) to avoid ARG_MAX Fixes: @egirl/egirl-platform with 5,175 files under ignored /apps/ /packages/ directories Co-Authored-By: Claude Sonnet 4.5 --- .../__pycache__/operations.cpython-312.pyc | Bin 13444 -> 14816 bytes src/auto_commit_service/git/operations.py | 82 +++++++++--------- 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc b/src/auto_commit_service/git/__pycache__/operations.cpython-312.pyc index dd26ac027e695723417ac72291907e8ae611244c..f434feecbc10ccf25662f738b8a4df8345c0df75 100644 GIT binary patch delta 2281 zcma)6ZA@F&89wKJ`19HhU^`$Sxft*;U>4kv21cO}hmU-8qoA!#moUfohS*`IIkw}- zyDn2(EuP92=v9ZiBodwW!9=Sp<4=B)wOWuiO?J$NWicuJ*Y=mwMBCc+$KGQ@#M(^S zwfvs<%rm)#|?46QVxmf%3P#HZB(X#! zw)7|LWkIs!IU9>^<4Nsef)~Sb+?}X}Gh+FWSfUiq`9Y%u4T%Y+N4;FTf5Jo#0#$*H z$Wby*J)Zc(Q!?qMDxRVVP3&dK4>G=43gmELMv@Df>6t0mRp5f*b2Ehg^a|RzpM+sZ zZi||&wapeRrD4g^{n5aA;-*C3rU3RY9hii%%*3LF z>R9-HR4TqwDH#3J)0EK;qm>HyTmj1w3!sbTM-}QCW?zAL8NQJTxVw+gzkY}wSdmbX zw^XF`Mb69HiW+3ASsYy$P0KlJQ%e6xgKGC(Yff46wwjb-%V*^oUg7=`&)0b>9+~arNBx6yw9v2sC}*Q9Z}vnU{|W3hLk_pQFXwQ(GNLWIy!Cj=Y07zsHgGWt@j@b3o%Y->*_XH10*luSEbV>f ztk9AXoJ#{s7lCMxYtzz}wX|(mJX_A@jH`FkHJEh`=3Kr_*RibY*!sbo>svYJ*t`YC zaNt?2`s*CC2MmDJYRKwb8J!5-jsei~ID|LHX0l^58)Fx*AIXS^H^siJ*q0NZ-4uti z;?P<&C!W}-JGl{0ryN)sV>kQ>y*YeaC*b&R>>H2k+B_X7?G>mnU3m~ubN8;8I`l6GU zsUg&qf8UaPHa0~r(!}?{sP8s(T1bqBm#B%;+{1<@{&jFZS*>&4V(}IAFU>FTBuPtd z*M7|#;F$30bFP!@GgrVTbSC5kH7RPYf|-E-v;VHSxi2M1O3k}HcuqahQLUk8NK$QT z75N2ly{n_GAF_B}ZR-%rJ8)8!J6wDkif2~mJ0=hGVr_aFJgWT*fD delta 1165 zcmZ{iZERCj7{|}Kr}w3=1J^fK2eIN3H%_7hH13NDahkBy5BQQe#upO|*%*oB>IeBoj2}8le2X!j(*eFDp5*-Q zIsgCjoO91}&iDEm&2_`+v;o@Mj%KvqqD!uejdqDnr)j@`5t2${uS4c!WB}P>a3^_^ z_W4{N@ryjmVPUdBhGJfXPKwmp>9lU@02rH!kF None: async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]: - """Check which files are ignored by .gitignore, including tracked files. + """Check which files git will refuse to stage due to .gitignore. + + Uses `git add --dry-run` to detect files that will fail to stage, + including tracked files now covered by .gitignore patterns. Args: repo_path: Path to the repository files: List of file paths to check Returns: - List of files that are NOT ignored (safe to stage) + List of files that are safe to stage (not ignored) """ if not files: return [] try: - # CRITICAL: git check-ignore won't flag tracked files, even if now in .gitignore - # Use `ls-files -i --exclude-standard` to find tracked-but-ignored files - file_list = "\n".join(files) - - # Check for tracked-but-ignored files (batch to avoid ARG_MAX) - tracked_ignored = set() + # CRITICAL: The only reliable way to detect stageable files is to try staging them + # with --dry-run. Neither check-ignore nor ls-files -i work for tracked-but-ignored files. + # Batch to avoid ARG_MAX BATCH_SIZE = 1000 - if len(files) > BATCH_SIZE: - for i in range(0, len(files), BATCH_SIZE): - batch = files[i : i + BATCH_SIZE] - stdout, _, _ = await _run_git_command( - "ls-files", "-i", "--exclude-standard", "--", - *batch, - cwd=repo_path, - check=False, - ) - if stdout.strip(): - tracked_ignored.update(stdout.strip().split("\n")) - else: - tracked_ignored_stdout, _, _ = await _run_git_command( - "ls-files", "-i", "--exclude-standard", "--", - *files, + safe_files = [] + ignored_files = [] + + batches = [files[i : i + BATCH_SIZE] for i in range(0, len(files), BATCH_SIZE)] + + for batch in batches: + # Try dry-run add - exit code 0 = success, 128 = some files ignored + stdout, stderr, returncode = await _run_git_command( + "add", "--dry-run", "--", + *batch, cwd=repo_path, check=False, ) - if tracked_ignored_stdout.strip(): - tracked_ignored = set(tracked_ignored_stdout.strip().split("\n")) - # Also check for untracked/new ignored files (uses stdin, no ARG_MAX issue) - untracked_ignored_stdout, _, _ = await _run_git_command( - "check-ignore", "--stdin", - cwd=repo_path, - check=False, - stdin=file_list.encode() - ) - untracked_ignored = set(untracked_ignored_stdout.strip().split("\n")) if untracked_ignored_stdout.strip() else set() + if returncode == 0: + # All files in batch are safe + safe_files.extend(batch) + elif returncode == 128 and "ignored" in stderr.lower(): + # Some files are ignored - parse error message to find which ones + # Git error format: "The following paths are ignored by one of your .gitignore files:\npath1\npath2" + lines = stderr.split("\n") + for i, line in enumerate(lines): + if "following paths are ignored" in line.lower(): + # Next lines until "hint:" are the ignored paths + for ignored_line in lines[i + 1:]: + if ignored_line.startswith("hint:") or not ignored_line.strip(): + break + ignored_path = ignored_line.strip() + if ignored_path: + ignored_files.append(ignored_path) - # Combine both sets of ignored files - all_ignored = tracked_ignored | untracked_ignored + # Files not in ignored list are safe + safe_files.extend([f for f in batch if f not in ignored_files and not any(f.startswith(ig + "/") for ig in ignored_files)]) + else: + # Unknown error - be conservative and include all files + logger.warning(f"git add --dry-run returned {returncode}: {stderr}, including all files from batch") + safe_files.extend(batch) - if all_ignored: - logger.info(f"Filtered {len(all_ignored)} gitignored files ({len(tracked_ignored)} tracked-but-ignored, {len(untracked_ignored)} new)") + if ignored_files: + logger.info(f"Filtered {len(ignored_files)} gitignored files (detected via dry-run)") - # Return only non-ignored files - return [f for f in files if f not in all_ignored] + return safe_files except GitError as e: # If check fails, return all files (safer than blocking commits)