feat(@ml/auto-commit-service): ✨ add cooldown logic for Claude recovery attempts
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
c508867e0e
commit
3b7be20bce
1 changed files with 23 additions and 8 deletions
|
|
@ -29,9 +29,12 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
HOSTNAME = socket.gethostname().split(".")[0] # e.g., "plum" from "plum.voyager.nasty.sh"
|
||||
|
||||
# Don't re-invoke Claude for the same repo more often than this — diverged
|
||||
# branches that Claude can't fix stay stuck; spamming every 5 min wastes tokens.
|
||||
# Cooldown between Claude recovery attempts on the same repo.
|
||||
# Short cooldown: first try after any idle period (covers transient divergence).
|
||||
# Long cooldown: once Claude has already looked at a repo and couldn't or
|
||||
# wouldn't resolve it, don't burn more tokens for 24h — human attention needed.
|
||||
CLAUDE_RECOVERY_COOLDOWN_SEC = 3600
|
||||
CLAUDE_STUCK_COOLDOWN_SEC = 86400
|
||||
|
||||
CLAUDE_RECOVERY_PROMPT = """You are recovering a diverged git branch in the auto-commit service on plum.
|
||||
|
||||
|
|
@ -87,6 +90,9 @@ class LocalCommitAgent:
|
|||
self._last_cycle: CycleResult | None = None
|
||||
self._total_cycles = 0
|
||||
self._last_recovery_at: dict[str, float] = {} # repo_name -> monotonic ts
|
||||
# repo_name -> required cooldown for *next* attempt. Starts at 1h, bumps
|
||||
# to 24h after Claude couldn't/wouldn't resolve. Resets on clean success.
|
||||
self._recovery_cooldown: dict[str, float] = {}
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
|
|
@ -183,9 +189,13 @@ class LocalCommitAgent:
|
|||
if not diff.strip():
|
||||
return False
|
||||
|
||||
# Get repo name and branch
|
||||
# Get repo name and branch (symbolic-ref works even for unborn branches,
|
||||
# unlike `rev-parse HEAD` which fails before the initial commit).
|
||||
repo_name = _repo_display_name(repo_path)
|
||||
branch = _git(repo_path, "rev-parse", "--abbrev-ref", "HEAD").strip() or "main"
|
||||
try:
|
||||
branch = _git(repo_path, "symbolic-ref", "--short", "HEAD").strip()
|
||||
except Exception:
|
||||
branch = "main"
|
||||
|
||||
# Ask ACS for commit message
|
||||
try:
|
||||
|
|
@ -319,7 +329,8 @@ class LocalCommitAgent:
|
|||
"""Invoke Claude Code (rate-limited) to reconcile divergence."""
|
||||
now = time.monotonic()
|
||||
last = self._last_recovery_at.get(repo_name, 0.0)
|
||||
cooled_down = (now - last) >= CLAUDE_RECOVERY_COOLDOWN_SEC
|
||||
required = self._recovery_cooldown.get(repo_name, CLAUDE_RECOVERY_COOLDOWN_SEC)
|
||||
cooled_down = (now - last) >= required
|
||||
|
||||
stall_entry = {
|
||||
"repo_name": repo_name,
|
||||
|
|
@ -335,7 +346,7 @@ class LocalCommitAgent:
|
|||
if not cooled_down:
|
||||
logger.info(
|
||||
f"Stalled {repo_name}: {ahead}↑ {behind}↓ (in cooldown, "
|
||||
f"next Claude attempt in {int(CLAUDE_RECOVERY_COOLDOWN_SEC - (now - last))}s)"
|
||||
f"next Claude attempt in {int(required - (now - last))}s)"
|
||||
)
|
||||
result.stalled_repos.append(stall_entry)
|
||||
return
|
||||
|
|
@ -350,22 +361,26 @@ class LocalCommitAgent:
|
|||
ahead2, behind2 = _ahead_behind(repo_path, upstream)
|
||||
if ahead2 == 0 and behind2 == 0:
|
||||
logger.info(f"Claude resolved {repo_name} cleanly")
|
||||
self._recovery_cooldown.pop(repo_name, None)
|
||||
return
|
||||
if behind2 == 0 and ahead2 > 0:
|
||||
try:
|
||||
_git(repo_path, "push")
|
||||
logger.info(f"Claude rebased {repo_name}; push succeeded")
|
||||
self._recovery_cooldown.pop(repo_name, None)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(f"Claude rebased {repo_name} but push failed: {e}")
|
||||
logger.warning(
|
||||
f"Claude exited clean but {repo_name} still diverged: {ahead2}↑ {behind2}↓ "
|
||||
f"— marking stalled (likely bailed on semantic conflicts)"
|
||||
f"— marking stalled for 24h (likely bailed on semantic conflicts)"
|
||||
)
|
||||
stall_entry.update(ahead=ahead2, behind=behind2, reason="claude_partial")
|
||||
self._recovery_cooldown[repo_name] = CLAUDE_STUCK_COOLDOWN_SEC
|
||||
else:
|
||||
logger.warning(f"Claude could not recover {repo_name} — marking stalled")
|
||||
logger.warning(f"Claude could not recover {repo_name} — marking stalled for 24h")
|
||||
stall_entry["reason"] = "claude_failed"
|
||||
self._recovery_cooldown[repo_name] = CLAUDE_STUCK_COOLDOWN_SEC
|
||||
|
||||
result.stalled_repos.append(stall_entry)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue