feat(auto-commit): add clauderecovery cooldown and stalled repo tracking

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-04-14 17:54:42 -07:00
parent b477082c30
commit 90f06a77aa
2 changed files with 155 additions and 8 deletions

View file

@ -29,6 +29,28 @@ logger = logging.getLogger(__name__)
HOSTNAME = socket.gethostname().split(".")[0] # e.g., "plum" from "plum.voyager.nasty.sh"
# Don't re-invoke Claude for the same repo more often than this — diverged
# branches that Claude can't fix stay stuck; spamming every 5 min wastes tokens.
CLAUDE_RECOVERY_COOLDOWN_SEC = 3600
CLAUDE_RECOVERY_PROMPT = """You are recovering a diverged git branch in the auto-commit service on plum.
Repo: {repo_name}
Path: {repo_path}
Local branch is {ahead} ahead and {behind} behind {upstream}.
Push was rejected as non-fast-forward.
Please:
1. Fetch the latest upstream.
2. Attempt `git rebase {upstream}`. If conflicts are mechanical (lockfiles,
generated files, imports), resolve them preserving both branches' intent.
3. If conflicts are semantic/high-risk (business logic in overlapping regions),
abort the rebase and exit with a message describing what needs human review.
4. After a clean rebase, run `git push`. Do not force-push.
Never delete work. Never use --force. If unsure, abort and exit cleanly.
"""
@dataclass
class CycleResult:
@ -37,6 +59,7 @@ class CycleResult:
repos_failed: int = 0
commits: list[dict] = field(default_factory=list)
errors: list[str] = field(default_factory=list)
stalled_repos: list[dict] = field(default_factory=list)
timestamp: str = ""
@ -63,6 +86,7 @@ class LocalCommitAgent:
self._repos: list[Path] = []
self._last_cycle: CycleResult | None = None
self._total_cycles = 0
self._last_recovery_at: dict[str, float] = {} # repo_name -> monotonic ts
@property
def is_running(self) -> bool:
@ -135,9 +159,18 @@ class LocalCommitAgent:
def _process_repo(self, repo_path: Path, result: CycleResult) -> bool:
"""Process a single repo. Returns True if a commit was made."""
# Best-effort fetch so divergence is visible before we pile on more commits.
# Offline is fine — swallow failures.
try:
_git(repo_path, "fetch", "--quiet", timeout=20)
except Exception as e:
logger.debug(f"Fetch skipped for {repo_path.name}: {e}")
# Check for changes
status = _git(repo_path, "status", "--porcelain")
if not status.strip():
# No local changes — but we might still be behind origin from the fetch above.
self._push_if_safe(repo_path, result)
return False
# Stage all changes
@ -179,12 +212,8 @@ class LocalCommitAgent:
# Get commit hash
commit_hash = _git(repo_path, "rev-parse", "HEAD").strip()
# Push
try:
_git(repo_path, "push")
except Exception as e:
logger.warning(f"Push failed for {repo_name}: {e}")
# Still record the commit even if push fails
# Push (with divergence handling)
self._push_if_safe(repo_path, result, repo_name=repo_name)
# Parse stats from diff
stat_line = diff.split("\n")[0] if diff else ""
@ -231,6 +260,107 @@ class LocalCommitAgent:
return True
def _push_if_safe(
self,
repo_path: Path,
result: CycleResult,
repo_name: str | None = None,
) -> None:
"""Push to upstream, handling divergence by deferring to Claude Code.
Called after a commit (repo_name provided) and also when a repo has
nothing to commit (to catch previously-stalled repos that have since
been reconciled).
"""
if repo_name is None:
repo_name = _repo_display_name(repo_path)
upstream = _upstream_ref(repo_path)
if not upstream:
return # Detached / no tracking branch — nothing to sync
ahead, behind = _ahead_behind(repo_path, upstream)
if ahead == 0 and behind == 0:
return # In sync
if ahead == 0 and behind > 0:
# Upstream moved, we have no local commits to push. Fast-forward.
try:
_git(repo_path, "merge", "--ff-only", upstream)
except Exception as e:
logger.warning(f"Fast-forward failed for {repo_name}: {e}")
return
if ahead > 0 and behind == 0:
# Clean ahead — normal push path.
try:
_git(repo_path, "push")
except Exception as e:
logger.warning(f"Push failed for {repo_name}: {e}")
# Re-check divergence: the failure may be a freshly-observed race.
ahead2, behind2 = _ahead_behind(repo_path, upstream)
if behind2 > 0:
self._handle_divergence(
repo_path, repo_name, upstream, ahead2, behind2, result,
)
return
# Diverged: both ahead and behind.
self._handle_divergence(repo_path, repo_name, upstream, ahead, behind, result)
def _handle_divergence(
self,
repo_path: Path,
repo_name: str,
upstream: str,
ahead: int,
behind: int,
result: CycleResult,
) -> None:
"""Invoke Claude Code (rate-limited) to reconcile divergence."""
now = time.monotonic()
last = self._last_recovery_at.get(repo_name, 0.0)
cooled_down = (now - last) >= CLAUDE_RECOVERY_COOLDOWN_SEC
stall_entry = {
"repo_name": repo_name,
"reason": "diverged",
"ahead": ahead,
"behind": behind,
"last_attempt": (
datetime.fromtimestamp(time.time() - (now - last), timezone.utc).isoformat()
if last else None
),
}
if not cooled_down:
logger.info(
f"Stalled {repo_name}: {ahead}{behind}↓ (in cooldown, "
f"next Claude attempt in {int(CLAUDE_RECOVERY_COOLDOWN_SEC - (now - last))}s)"
)
result.stalled_repos.append(stall_entry)
return
self._last_recovery_at[repo_name] = now
logger.warning(
f"Diverged {repo_name}: {ahead}{behind}↓ — invoking claude-code for recovery"
)
if _invoke_claude_recovery(repo_path, repo_name, upstream, ahead, behind):
ahead2, behind2 = _ahead_behind(repo_path, upstream)
if ahead2 == 0 and behind2 == 0:
logger.info(f"Claude resolved {repo_name} cleanly")
return
if behind2 == 0 and ahead2 > 0:
try:
_git(repo_path, "push")
logger.info(f"Claude rebased {repo_name}; push succeeded")
return
except Exception as e:
logger.error(f"Claude rebased {repo_name} but push failed: {e}")
stall_entry.update(ahead=ahead2, behind=behind2, reason="claude_partial")
result.stalled_repos.append(stall_entry)
def close(self) -> None:
self.stop()
self._client.close()

View file

@ -24,10 +24,25 @@ class LocalCommit:
# Directories to skip during discovery
SKIP_DIRS = frozenset({
"node_modules", ".venv", "venv", "dist", "build", "__pycache__",
".git", ".cache", ".Trash",
".git", ".cache", ".Trash", "last-linux-backup",
})
def _is_usable_repo(path: Path) -> bool:
"""True when `path` is a working git repo we can run commands against.
Some backup copies contain a partial `.git/` (HEAD + config) but are missing
`objects/` or `refs/`, so `git status` fails. Validate the artifacts that
make a repo functional before treating it as discoverable.
"""
dot_git = path / ".git"
if not dot_git.exists():
return False
if dot_git.is_dir():
return (dot_git / "HEAD").is_file() and (dot_git / "objects").is_dir()
return dot_git.is_file() # gitfile (worktree/submodule)
def discover_repos(base_paths: list[Path], max_depth: int = 4) -> list[Path]:
"""Find git repositories under the given base paths."""
repos: list[Path] = []
@ -44,9 +59,11 @@ def _walk_for_repos(path: Path, repos: list[Path], depth: int, max_depth: int) -
if depth > max_depth:
return
if (path / ".git").exists():
if _is_usable_repo(path):
repos.append(path)
return # Don't recurse into sub-repos
if (path / ".git").exists():
return # Broken/partial repo — stop here, don't treat inner dirs as repos
try:
entries = sorted(path.iterdir())