diff --git a/src/auto_commit_service/tray/app.py b/src/auto_commit_service/tray/app.py index 6bbcc49..67a15f4 100644 --- a/src/auto_commit_service/tray/app.py +++ b/src/auto_commit_service/tray/app.py @@ -73,6 +73,9 @@ class CommitsTrayApp(rumps.App): self._local_repos: list[Path] = [] self._repos_discovered = False + # Track stalled-repo transitions so we notify once per edge + self._last_stalled_set: set[str] = set() + # Build menu self._status_item = rumps.MenuItem("Status: starting...", callback=None) self._status_item.set_callback(None) @@ -132,6 +135,37 @@ class CommitsTrayApp(rumps.App): last = self.agent.last_cycle stalled = list(last.stalled_repos) if last else [] + # Notify on transitions into/out of stalled state (once per edge). + new_stalled = {e["repo_name"] for e in stalled} + newly_stalled = new_stalled - self._last_stalled_set + recovered = self._last_stalled_set - new_stalled + + for repo_name in sorted(newly_stalled): + entry = next((e for e in stalled if e["repo_name"] == repo_name), {}) + ahead = entry.get("ahead", 0) + behind = entry.get("behind", 0) + reason = entry.get("reason", "diverged") + try: + rumps.notification( + title="ACS stalled", + subtitle=repo_name, + message=f"{ahead}↑ {behind}↓ ({reason}) — needs human attention", + ) + except Exception as exc: + logger.warning("Failed to post stalled notification for %s: %s", repo_name, exc) + + for repo_name in sorted(recovered): + try: + rumps.notification( + title="ACS recovered", + subtitle=repo_name, + message="back in sync", + ) + except Exception as exc: + logger.warning("Failed to post recovery notification for %s: %s", repo_name, exc) + + self._last_stalled_set = new_stalled + # Icon if not agent_running: icon = ICON_ERROR diff --git a/src/auto_commit_service/tray/local_agent.py b/src/auto_commit_service/tray/local_agent.py index 8e12439..6fa9024 100644 --- a/src/auto_commit_service/tray/local_agent.py +++ b/src/auto_commit_service/tray/local_agent.py @@ -9,7 +9,9 @@ Runs as a background thread managed by the tray app. from __future__ import annotations +import json import logging +import os import socket import subprocess import threading @@ -75,6 +77,7 @@ class LocalCommitAgent: repos_paths: list[Path], cycle_seconds: int = 300, co_author: str = "Lilith Autocommit ", + recovery_state_path: Path | None = None, ): self.acs_url = acs_url.rstrip("/") self.repos_paths = repos_paths @@ -89,10 +92,62 @@ class LocalCommitAgent: self._repos: list[Path] = [] self._last_cycle: CycleResult | None = None self._total_cycles = 0 - self._last_recovery_at: dict[str, float] = {} # repo_name -> monotonic ts + self._recovery_state_path_override = recovery_state_path + self._last_recovery_at: dict[str, float] = {} # repo_name -> wall-clock UNIX ts # repo_name -> required cooldown for *next* attempt. Starts at 1h, bumps # to 24h after Claude couldn't/wouldn't resolve. Resets on clean success. self._recovery_cooldown: dict[str, float] = {} + self._load_recovery_state() + + def _recovery_state_path(self) -> Path: + if self._recovery_state_path_override is not None: + return self._recovery_state_path_override + return Path.home() / "Library" / "Application Support" / "Commits" / "recovery.json" + + def _load_recovery_state(self) -> None: + path = self._recovery_state_path() + try: + with path.open("r", encoding="utf-8") as fh: + data = json.load(fh) + except FileNotFoundError: + logger.debug(f"Recovery state file not found at {path}; starting fresh") + return + except (OSError, json.JSONDecodeError) as e: + logger.debug(f"Recovery state unreadable at {path}: {e}; starting fresh") + return + + if not isinstance(data, dict): + logger.debug(f"Recovery state at {path} is not a dict; starting fresh") + return + + for repo_name, entry in data.items(): + if not isinstance(entry, dict): + continue + last_attempt = entry.get("last_attempt") + cooldown_sec = entry.get("cooldown_sec") + if isinstance(last_attempt, (int, float)): + self._last_recovery_at[repo_name] = float(last_attempt) + if isinstance(cooldown_sec, (int, float)): + self._recovery_cooldown[repo_name] = float(cooldown_sec) + + def _save_recovery_state(self) -> None: + path = self._recovery_state_path() + payload: dict[str, dict[str, float]] = {} + for repo_name, last_attempt in self._last_recovery_at.items(): + payload[repo_name] = { + "last_attempt": last_attempt, + "cooldown_sec": self._recovery_cooldown.get( + repo_name, CLAUDE_RECOVERY_COOLDOWN_SEC + ), + } + try: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = path.with_suffix(path.suffix + ".tmp") + with tmp_path.open("w", encoding="utf-8") as fh: + json.dump(payload, fh, indent=2, sort_keys=True) + os.replace(tmp_path, path) + except OSError as e: + logger.warning(f"Failed to persist recovery state to {path}: {e}") @property def is_running(self) -> bool: @@ -327,7 +382,7 @@ class LocalCommitAgent: result: CycleResult, ) -> None: """Invoke Claude Code (rate-limited) to reconcile divergence.""" - now = time.monotonic() + now = time.time() last = self._last_recovery_at.get(repo_name, 0.0) required = self._recovery_cooldown.get(repo_name, CLAUDE_RECOVERY_COOLDOWN_SEC) cooled_down = (now - last) >= required @@ -338,7 +393,7 @@ class LocalCommitAgent: "ahead": ahead, "behind": behind, "last_attempt": ( - datetime.fromtimestamp(time.time() - (now - last), timezone.utc).isoformat() + datetime.fromtimestamp(last, timezone.utc).isoformat() if last else None ), } @@ -352,6 +407,7 @@ class LocalCommitAgent: return self._last_recovery_at[repo_name] = now + self._save_recovery_state() logger.warning( f"Diverged {repo_name}: {ahead}↑ {behind}↓ — invoking claude-code for recovery" ) @@ -362,12 +418,16 @@ class LocalCommitAgent: if ahead2 == 0 and behind2 == 0: logger.info(f"Claude resolved {repo_name} cleanly") self._recovery_cooldown.pop(repo_name, None) + self._last_recovery_at.pop(repo_name, None) + self._save_recovery_state() return if behind2 == 0 and ahead2 > 0: try: _git(repo_path, "push") logger.info(f"Claude rebased {repo_name}; push succeeded") self._recovery_cooldown.pop(repo_name, None) + self._last_recovery_at.pop(repo_name, None) + self._save_recovery_state() return except Exception as e: logger.error(f"Claude rebased {repo_name} but push failed: {e}") @@ -377,10 +437,12 @@ class LocalCommitAgent: ) stall_entry.update(ahead=ahead2, behind=behind2, reason="claude_partial") self._recovery_cooldown[repo_name] = CLAUDE_STUCK_COOLDOWN_SEC + self._save_recovery_state() else: logger.warning(f"Claude could not recover {repo_name} — marking stalled for 24h") stall_entry["reason"] = "claude_failed" self._recovery_cooldown[repo_name] = CLAUDE_STUCK_COOLDOWN_SEC + self._save_recovery_state() result.stalled_repos.append(stall_entry)