"""Async git command wrappers using subprocess.""" import asyncio import logging import os from pathlib import Path from .repository import GitStatus, CommitResult, PushResult logger = logging.getLogger(__name__) def unquote_git_path(path: str) -> str: """Strip git's porcelain quoting from paths with spaces/special chars.""" if path.startswith('"') and path.endswith('"'): return path[1:-1] return path class GitError(Exception): """Base exception for git operations.""" def __init__(self, message: str, stderr: str = "", returncode: int = 1): super().__init__(message) self.stderr = stderr self.returncode = returncode class MergeConflictError(GitError): """Raised when there's a merge conflict.""" pass class PushRejectedError(GitError): """Raised when push is rejected by remote.""" pass async def _run_git_command( *args: str, cwd: Path, check: bool = True, stdin: bytes | None = None, env: dict[str, str] | None = None, ) -> tuple[str, str, int]: """Run a git command asynchronously. Uses asyncio subprocess with argument list (safe, no shell injection). This is equivalent to Node.js execFile - arguments are passed directly to the process without shell interpretation. Args: *args: Git command arguments cwd: Working directory for the command check: Raise GitError on non-zero exit code stdin: Optional stdin data to send to the process env: Optional env overrides merged onto os.environ """ merged_env = {**os.environ, **(env or {})} # asyncio.create_subprocess_exec is safe - no shell, args passed directly create_process = asyncio.create_subprocess_exec proc = await create_process( "git", *args, cwd=str(cwd), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, stdin=asyncio.subprocess.PIPE if stdin is not None else None, env=merged_env, ) stdout, stderr = await proc.communicate(input=stdin) stdout_str = stdout.decode().strip() stderr_str = stderr.decode().strip() returncode = proc.returncode or 0 if check and returncode != 0: raise GitError( f"git {' '.join(args)} failed: {stderr_str}", stderr=stderr_str, returncode=returncode, ) return stdout_str, stderr_str, returncode async def git_status(repo_path: Path) -> GitStatus: """Get the git status of a repository.""" # Get porcelain status for parsing stdout, _, _ = await _run_git_command("status", "--porcelain", "-b", cwd=repo_path) lines = stdout.split("\n") if stdout else [] staged = [] modified = [] untracked = [] deleted = [] branch = "main" ahead = 0 behind = 0 for line in lines: if not line: continue if line.startswith("##"): # Parse branch info: ## main...origin/main [ahead 1, behind 2] branch_info = line[3:] if "..." in branch_info: branch = branch_info.split("...")[0] else: branch = branch_info.split()[0] if branch_info else "main" if "[ahead " in line: try: ahead_part = line.split("[ahead ")[1].split("]")[0].split(",")[0] ahead = int(ahead_part) except (IndexError, ValueError): pass if "behind " in line: try: behind_part = line.split("behind ")[1].split("]")[0] behind = int(behind_part) except (IndexError, ValueError): pass continue # Parse file status if len(line) >= 3: index_status = line[0] worktree_status = line[1] filepath = unquote_git_path(line[3:]) if index_status == "?" and worktree_status == "?": untracked.append(filepath) elif index_status == "D" or worktree_status == "D": deleted.append(filepath) elif index_status in "MARC": staged.append(filepath) elif worktree_status == "M": modified.append(filepath) has_changes = bool(staged or modified or untracked or deleted) return GitStatus( has_changes=has_changes, staged=staged, modified=modified, untracked=untracked, deleted=deleted, branch=branch, ahead=ahead, behind=behind, ) async def git_diff(repo_path: Path, staged: bool = False) -> str: """Get the diff of changes. Args: repo_path: Path to the repository staged: If True, show only staged changes (--cached) """ args = ["diff"] if staged: args.append("--cached") else: args.append("HEAD") stdout, _, _ = await _run_git_command(*args, cwd=repo_path, check=False) return stdout async def git_add_all(repo_path: Path) -> None: """Stage all changes including untracked files.""" await _run_git_command("add", "-A", cwd=repo_path) async def git_check_ignored(repo_path: Path, files: list[str]) -> list[str]: """Check which files git will refuse to stage due to .gitignore. Uses `git add --dry-run` to detect files that will fail to stage, including tracked files now covered by .gitignore patterns. Args: repo_path: Path to the repository files: List of file paths to check Returns: List of files that are safe to stage (not ignored) """ if not files: return [] try: # CRITICAL: The only reliable way to detect stageable files is to try staging them # with --dry-run. Neither check-ignore nor ls-files -i work for tracked-but-ignored files. # Batch to avoid ARG_MAX BATCH_SIZE = 1000 safe_files = [] ignored_files = [] batches = [files[i : i + BATCH_SIZE] for i in range(0, len(files), BATCH_SIZE)] for batch in batches: # Try dry-run add - exit code 0 = success, 128 = some files ignored stdout, stderr, returncode = await _run_git_command( "add", "--dry-run", "--", *batch, cwd=repo_path, check=False, ) if returncode == 0: # All files in batch are safe safe_files.extend(batch) elif (returncode == 1 or returncode == 128) and "ignored" in stderr.lower(): # Some files are ignored - parse error message to find which ones # Git error format: "The following paths are ignored by one of your .gitignore files:\npath1\npath2" # Exit code 1 = some files ignored, 128 = all files ignored batch_ignored = [] lines = stderr.split("\n") for i, line in enumerate(lines): if "following paths are ignored" in line.lower(): # Next lines until "hint:" are the ignored paths for ignored_line in lines[i + 1:]: if ignored_line.startswith("hint:") or not ignored_line.strip(): break ignored_path = ignored_line.strip() if ignored_path: batch_ignored.append(ignored_path) ignored_files.append(ignored_path) # Files not in ignored list are safe safe_files.extend([f for f in batch if f not in batch_ignored and not any(f.startswith(ig + "/") for ig in batch_ignored)]) else: # Unknown error - be conservative and include all files logger.warning(f"git add --dry-run returned {returncode}: {stderr}, including all files from batch") safe_files.extend(batch) if ignored_files: logger.info(f"Filtered {len(ignored_files)} gitignored files (detected via dry-run)") return safe_files except GitError as e: # If check fails, return all files (safer than blocking commits) logger.warning(f"git ignore check failed in {repo_path}: {e}, proceeding without filter") return files async def git_add_specific(repo_path: Path, files: list[str]) -> None: """Stage specific files only, filtering out gitignored files. Args: repo_path: Path to the repository files: List of file paths to stage """ if not files: return # Filter out gitignored files before staging stageable_files = await git_check_ignored(repo_path, files) if not stageable_files: logger.warning(f"All {len(files)} files are gitignored, nothing to stage") return if len(stageable_files) < len(files): ignored_count = len(files) - len(stageable_files) logger.debug(f"Filtered out {ignored_count} gitignored files, staging {len(stageable_files)}") # Batch git add if too many files to avoid ARG_MAX limit # Most systems have ARG_MAX ~2MB, conservatively batch at 1000 files BATCH_SIZE = 1000 if len(stageable_files) > BATCH_SIZE: logger.info(f"Batching git add for {len(stageable_files)} files ({BATCH_SIZE} per batch)") for i in range(0, len(stageable_files), BATCH_SIZE): batch = stageable_files[i : i + BATCH_SIZE] logger.debug(f"Adding batch {i // BATCH_SIZE + 1}/{(len(stageable_files) + BATCH_SIZE - 1) // BATCH_SIZE}") await _run_git_command("add", "--", *batch, cwd=repo_path) else: # Add files in a single command for efficiency await _run_git_command("add", "--", *stageable_files, cwd=repo_path) async def git_commit(repo_path: Path, message: str) -> CommitResult: """Create a commit with the given message.""" try: stdout, stderr, returncode = await _run_git_command( "commit", "-m", message, cwd=repo_path, check=False ) if returncode != 0: # Check for "nothing to commit" if "nothing to commit" in stdout or "nothing to commit" in stderr: return CommitResult( success=False, error="Nothing to commit", ) return CommitResult( success=False, error=stderr or stdout, ) # Get commit hash hash_stdout, _, _ = await _run_git_command( "rev-parse", "--short", "HEAD", cwd=repo_path ) return CommitResult( success=True, commit_hash=hash_stdout, message=message, ) except GitError as e: return CommitResult( success=False, error=str(e), ) async def git_push( repo_path: Path, remote: str = "origin", branch: str = "main", ) -> PushResult: """Push commits to remote. Uses --no-verify to bypass pre-push hooks (e.g. adversarial-view generators that require GPU services). Uses -u to set upstream tracking so that subsequent `git status -b` reports ahead/behind correctly. """ try: _, stderr, returncode = await _run_git_command( "push", "--no-verify", "-u", remote, branch, cwd=repo_path, check=False ) if returncode != 0: rejected = "rejected" in stderr.lower() or "non-fast-forward" in stderr.lower() if rejected: raise PushRejectedError( f"Push rejected: {stderr}", stderr=stderr, returncode=returncode, ) return PushResult( success=False, remote=remote, branch=branch, error=stderr, rejected=rejected, ) return PushResult( success=True, remote=remote, branch=branch, ) except PushRejectedError: raise except GitError as e: return PushResult( success=False, remote=remote, branch=branch, error=str(e), ) async def _abort_rebase_verified(repo_path: Path) -> bool: """Run rebase --abort and verify the rebase state is cleared. Escalates to --quit + reset --hard ORIG_HEAD if abort doesn't clear state. Returns True if repo is clean after recovery, False if unrecoverable. """ rebase_merge = repo_path / ".git" / "rebase-merge" rebase_apply = repo_path / ".git" / "rebase-apply" await _run_git_command("rebase", "--abort", cwd=repo_path, check=False) if not rebase_merge.exists() and not rebase_apply.exists(): return True logger.warning(f"rebase --abort didn't clear state in {repo_path}, escalating to --quit") await _run_git_command("rebase", "--quit", cwd=repo_path, check=False) if not rebase_merge.exists() and not rebase_apply.exists(): return True logger.error(f"rebase --quit also failed in {repo_path}, resetting to ORIG_HEAD") await _run_git_command("reset", "--hard", "ORIG_HEAD", cwd=repo_path, check=False) return not rebase_merge.exists() and not rebase_apply.exists() async def pre_cycle_recover(repo_path: Path) -> dict: """Detect and recover from orphan rebase/merge state left by a prior cycle. Returns dict: {was_stuck: bool, recovered: bool, state: str} """ rebase_merge = repo_path / ".git" / "rebase-merge" rebase_apply = repo_path / ".git" / "rebase-apply" merge_head = repo_path / ".git" / "MERGE_HEAD" if rebase_merge.exists() or rebase_apply.exists(): logger.warning(f"Orphan rebase detected in {repo_path}, attempting recovery") recovered = await _abort_rebase_verified(repo_path) return {"was_stuck": True, "recovered": recovered, "state": "rebase"} if merge_head.exists(): logger.warning(f"Orphan merge detected in {repo_path}, aborting") await _run_git_command("merge", "--abort", cwd=repo_path, check=False) return {"was_stuck": True, "recovered": not merge_head.exists(), "state": "merge"} return {"was_stuck": False, "recovered": False, "state": "clean"} async def git_pull_rebase( repo_path: Path, remote: str = "origin", branch: str = "main", ) -> bool: """Pull with rebase to resolve diverged history. Returns True if successful, raises MergeConflictError on conflicts. """ try: _, stderr, returncode = await _run_git_command( "pull", "--rebase", remote, branch, cwd=repo_path, check=False ) if returncode != 0: if "conflict" in stderr.lower() or "CONFLICT" in stderr: from .conflict_resolution import try_auto_resolve_lockfiles resolution = await try_auto_resolve_lockfiles(repo_path) if resolution["resolved"]: _, cont_stderr, cont_rc = await _run_git_command( "rebase", "--continue", cwd=repo_path, check=False, env={"GIT_EDITOR": "true"}, ) if cont_rc == 0: logger.info( f"Auto-resolved lockfile conflicts in {repo_path}: " f"{resolution['files_resolved']}" ) return True logger.warning( f"rebase --continue failed after auto-resolution: {cont_stderr}" ) if not await _abort_rebase_verified(repo_path): raise GitError( f"Rebase conflict and abort failed — repo stuck in {repo_path}", stderr=stderr, returncode=returncode, ) raise MergeConflictError( f"Merge conflict during rebase: {stderr}", stderr=stderr, returncode=returncode, ) raise GitError( f"Pull rebase failed: {stderr}", stderr=stderr, returncode=returncode, ) return True except (MergeConflictError, GitError): raise except Exception as e: raise GitError(f"Unexpected error during pull rebase: {e}") async def git_fetch( repo_path: Path, remote: str = "origin", ) -> bool: """Fetch from remote to update refs without touching the working tree. Returns True on success, False on failure (non-fatal). """ try: _, stderr, returncode = await _run_git_command( "fetch", remote, cwd=repo_path, check=False ) if returncode != 0: logger.warning(f"git fetch failed in {repo_path}: {stderr}") return False return True except Exception as e: logger.warning(f"git fetch error in {repo_path}: {e}") return False async def pre_cycle_sync( repo_path: Path, remote: str = "origin", branch: str = "main", ) -> dict: """Sync repo with remote before a commit cycle. Sequence: recover orphan state -> fetch -> check if behind -> skip if dirty -> pull --rebase. Never stashes — dirty trees are left alone and committed normally. All failures are non-fatal (logged and returned in result dict). Returns dict with keys: fetched, pulled, behind_count, skipped_dirty, recovered, error """ result = { "fetched": False, "pulled": False, "behind_count": 0, "skipped_dirty": False, "recovered": False, "error": None, } recovery = await pre_cycle_recover(repo_path) if recovery["was_stuck"]: result["recovered"] = recovery["recovered"] if not recovery["recovered"]: result["error"] = f"orphan_{recovery['state']}_unrecovered" return result # Step 1: Fetch if not await git_fetch(repo_path, remote): result["error"] = "fetch_failed" return result result["fetched"] = True # Step 2: Check if behind (re-read status after fetch updates refs) try: status = await git_status(repo_path) except Exception as e: result["error"] = f"status_failed: {e}" return result result["behind_count"] = status.behind if status.behind == 0: return result # Nothing to pull # Step 3: Skip pull if working tree is dirty — multiple agents may be # working concurrently, so stashing is unsafe. The dirty changes will # get committed this cycle, pushed, and the next cycle will pull clean. if status.has_changes: result["skipped_dirty"] = True logger.debug( f"Skipping pull for {repo_path}: dirty working tree " f"({status.behind} commits behind, will sync after commit)" ) return result # Step 4: Pull --rebase (clean tree only) try: await git_pull_rebase(repo_path, remote, branch) result["pulled"] = True except MergeConflictError as e: logger.warning(f"Conflict during pre-cycle pull in {repo_path}: {e}") result["error"] = "merge_conflict" except GitError as e: logger.warning(f"Pull failed in {repo_path}: {e}") result["error"] = f"pull_failed: {e}" return result async def git_log_recent( repo_path: Path, count: int = 5, ) -> list[str]: """Get recent commit messages for style reference.""" stdout, _, _ = await _run_git_command( "log", f"-{count}", "--format=%s", cwd=repo_path, check=False ) return [line for line in stdout.split("\n") if line]