feat(@ml/auto-commit-service): ✨ add stalled repo notifications
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
3b7be20bce
commit
a4a2f8dd82
2 changed files with 99 additions and 3 deletions
|
|
@ -73,6 +73,9 @@ class CommitsTrayApp(rumps.App):
|
|||
self._local_repos: list[Path] = []
|
||||
self._repos_discovered = False
|
||||
|
||||
# Track stalled-repo transitions so we notify once per edge
|
||||
self._last_stalled_set: set[str] = set()
|
||||
|
||||
# Build menu
|
||||
self._status_item = rumps.MenuItem("Status: starting...", callback=None)
|
||||
self._status_item.set_callback(None)
|
||||
|
|
@ -132,6 +135,37 @@ class CommitsTrayApp(rumps.App):
|
|||
last = self.agent.last_cycle
|
||||
stalled = list(last.stalled_repos) if last else []
|
||||
|
||||
# Notify on transitions into/out of stalled state (once per edge).
|
||||
new_stalled = {e["repo_name"] for e in stalled}
|
||||
newly_stalled = new_stalled - self._last_stalled_set
|
||||
recovered = self._last_stalled_set - new_stalled
|
||||
|
||||
for repo_name in sorted(newly_stalled):
|
||||
entry = next((e for e in stalled if e["repo_name"] == repo_name), {})
|
||||
ahead = entry.get("ahead", 0)
|
||||
behind = entry.get("behind", 0)
|
||||
reason = entry.get("reason", "diverged")
|
||||
try:
|
||||
rumps.notification(
|
||||
title="ACS stalled",
|
||||
subtitle=repo_name,
|
||||
message=f"{ahead}↑ {behind}↓ ({reason}) — needs human attention",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to post stalled notification for %s: %s", repo_name, exc)
|
||||
|
||||
for repo_name in sorted(recovered):
|
||||
try:
|
||||
rumps.notification(
|
||||
title="ACS recovered",
|
||||
subtitle=repo_name,
|
||||
message="back in sync",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to post recovery notification for %s: %s", repo_name, exc)
|
||||
|
||||
self._last_stalled_set = new_stalled
|
||||
|
||||
# Icon
|
||||
if not agent_running:
|
||||
icon = ICON_ERROR
|
||||
|
|
|
|||
|
|
@ -9,7 +9,9 @@ Runs as a background thread managed by the tray app.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import threading
|
||||
|
|
@ -75,6 +77,7 @@ class LocalCommitAgent:
|
|||
repos_paths: list[Path],
|
||||
cycle_seconds: int = 300,
|
||||
co_author: str = "Lilith Autocommit <noreply@atlilith.com>",
|
||||
recovery_state_path: Path | None = None,
|
||||
):
|
||||
self.acs_url = acs_url.rstrip("/")
|
||||
self.repos_paths = repos_paths
|
||||
|
|
@ -89,10 +92,62 @@ class LocalCommitAgent:
|
|||
self._repos: list[Path] = []
|
||||
self._last_cycle: CycleResult | None = None
|
||||
self._total_cycles = 0
|
||||
self._last_recovery_at: dict[str, float] = {} # repo_name -> monotonic ts
|
||||
self._recovery_state_path_override = recovery_state_path
|
||||
self._last_recovery_at: dict[str, float] = {} # repo_name -> wall-clock UNIX ts
|
||||
# repo_name -> required cooldown for *next* attempt. Starts at 1h, bumps
|
||||
# to 24h after Claude couldn't/wouldn't resolve. Resets on clean success.
|
||||
self._recovery_cooldown: dict[str, float] = {}
|
||||
self._load_recovery_state()
|
||||
|
||||
def _recovery_state_path(self) -> Path:
|
||||
if self._recovery_state_path_override is not None:
|
||||
return self._recovery_state_path_override
|
||||
return Path.home() / "Library" / "Application Support" / "Commits" / "recovery.json"
|
||||
|
||||
def _load_recovery_state(self) -> None:
|
||||
path = self._recovery_state_path()
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as fh:
|
||||
data = json.load(fh)
|
||||
except FileNotFoundError:
|
||||
logger.debug(f"Recovery state file not found at {path}; starting fresh")
|
||||
return
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
logger.debug(f"Recovery state unreadable at {path}: {e}; starting fresh")
|
||||
return
|
||||
|
||||
if not isinstance(data, dict):
|
||||
logger.debug(f"Recovery state at {path} is not a dict; starting fresh")
|
||||
return
|
||||
|
||||
for repo_name, entry in data.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
last_attempt = entry.get("last_attempt")
|
||||
cooldown_sec = entry.get("cooldown_sec")
|
||||
if isinstance(last_attempt, (int, float)):
|
||||
self._last_recovery_at[repo_name] = float(last_attempt)
|
||||
if isinstance(cooldown_sec, (int, float)):
|
||||
self._recovery_cooldown[repo_name] = float(cooldown_sec)
|
||||
|
||||
def _save_recovery_state(self) -> None:
|
||||
path = self._recovery_state_path()
|
||||
payload: dict[str, dict[str, float]] = {}
|
||||
for repo_name, last_attempt in self._last_recovery_at.items():
|
||||
payload[repo_name] = {
|
||||
"last_attempt": last_attempt,
|
||||
"cooldown_sec": self._recovery_cooldown.get(
|
||||
repo_name, CLAUDE_RECOVERY_COOLDOWN_SEC
|
||||
),
|
||||
}
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = path.with_suffix(path.suffix + ".tmp")
|
||||
with tmp_path.open("w", encoding="utf-8") as fh:
|
||||
json.dump(payload, fh, indent=2, sort_keys=True)
|
||||
os.replace(tmp_path, path)
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to persist recovery state to {path}: {e}")
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
|
|
@ -327,7 +382,7 @@ class LocalCommitAgent:
|
|||
result: CycleResult,
|
||||
) -> None:
|
||||
"""Invoke Claude Code (rate-limited) to reconcile divergence."""
|
||||
now = time.monotonic()
|
||||
now = time.time()
|
||||
last = self._last_recovery_at.get(repo_name, 0.0)
|
||||
required = self._recovery_cooldown.get(repo_name, CLAUDE_RECOVERY_COOLDOWN_SEC)
|
||||
cooled_down = (now - last) >= required
|
||||
|
|
@ -338,7 +393,7 @@ class LocalCommitAgent:
|
|||
"ahead": ahead,
|
||||
"behind": behind,
|
||||
"last_attempt": (
|
||||
datetime.fromtimestamp(time.time() - (now - last), timezone.utc).isoformat()
|
||||
datetime.fromtimestamp(last, timezone.utc).isoformat()
|
||||
if last else None
|
||||
),
|
||||
}
|
||||
|
|
@ -352,6 +407,7 @@ class LocalCommitAgent:
|
|||
return
|
||||
|
||||
self._last_recovery_at[repo_name] = now
|
||||
self._save_recovery_state()
|
||||
logger.warning(
|
||||
f"Diverged {repo_name}: {ahead}↑ {behind}↓ — invoking claude-code for recovery"
|
||||
)
|
||||
|
|
@ -362,12 +418,16 @@ class LocalCommitAgent:
|
|||
if ahead2 == 0 and behind2 == 0:
|
||||
logger.info(f"Claude resolved {repo_name} cleanly")
|
||||
self._recovery_cooldown.pop(repo_name, None)
|
||||
self._last_recovery_at.pop(repo_name, None)
|
||||
self._save_recovery_state()
|
||||
return
|
||||
if behind2 == 0 and ahead2 > 0:
|
||||
try:
|
||||
_git(repo_path, "push")
|
||||
logger.info(f"Claude rebased {repo_name}; push succeeded")
|
||||
self._recovery_cooldown.pop(repo_name, None)
|
||||
self._last_recovery_at.pop(repo_name, None)
|
||||
self._save_recovery_state()
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(f"Claude rebased {repo_name} but push failed: {e}")
|
||||
|
|
@ -377,10 +437,12 @@ class LocalCommitAgent:
|
|||
)
|
||||
stall_entry.update(ahead=ahead2, behind=behind2, reason="claude_partial")
|
||||
self._recovery_cooldown[repo_name] = CLAUDE_STUCK_COOLDOWN_SEC
|
||||
self._save_recovery_state()
|
||||
else:
|
||||
logger.warning(f"Claude could not recover {repo_name} — marking stalled for 24h")
|
||||
stall_entry["reason"] = "claude_failed"
|
||||
self._recovery_cooldown[repo_name] = CLAUDE_STUCK_COOLDOWN_SEC
|
||||
self._save_recovery_state()
|
||||
|
||||
result.stalled_repos.append(stall_entry)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue