diff --git a/src/auto_commit_service/app.py b/src/auto_commit_service/app.py
index 71b90c0..862ae1b 100644
--- a/src/auto_commit_service/app.py
+++ b/src/auto_commit_service/app.py
@@ -147,6 +147,10 @@ def create_auto_commit_service(
             llama_service_available=llama_available,
             repos_accessible=repos_accessible,
             error=error,
+            llama_service_crashed=service_crashed,
+            llama_service_restart_attempts=daemon.service_restart_attempts,
+            llama_service_last_crash=daemon.service_last_crash_time,
+            llama_service_last_successful_restart=daemon.service_last_successful_restart,
         )
 
     @app.get("/status", response_model=DaemonStatus)
@@ -162,6 +166,9 @@ def create_auto_commit_service(
             service_crashed=daemon.service_crashed,
             service_health=daemon.service_health,
             last_health_check=daemon.last_health_check,
+            service_restart_attempts=daemon.service_restart_attempts,
+            service_last_crash_time=daemon.service_last_crash_time,
+            service_last_successful_restart=daemon.service_last_successful_restart,
         )
 
     @app.post("/trigger", response_model=TriggerResponse)
diff --git a/src/auto_commit_service/config.py b/src/auto_commit_service/config.py
index 278ebd2..48330a7 100644
--- a/src/auto_commit_service/config.py
+++ b/src/auto_commit_service/config.py
@@ -121,6 +121,14 @@ class AutoCommitSettings(BaseServiceSettings):
         default=0,
         description="Cycles between health checks (0 = check every cycle)",
     )
+    llama_service_max_restart_attempts: int = Field(
+        default=3,
+        description="Maximum restart attempts before giving up",
+    )
+    llama_service_restart_backoff_seconds: float = Field(
+        default=5.0,
+        description="Delay between restart attempts (seconds)",
+    )
 
     # Model-boss integration for auto-loading LLM
     llama_model_id: str = Field(
diff --git a/src/auto_commit_service/models.py b/src/auto_commit_service/models.py
index 5ab515f..8ab583b 100644
--- a/src/auto_commit_service/models.py
+++ b/src/auto_commit_service/models.py
@@ -52,6 +52,9 @@ class DaemonStatus(BaseModel):
     service_crashed: bool = False
     service_health: str | None = None
     last_health_check: datetime | None = None
+    service_restart_attempts: int = 0
+    service_last_crash_time: datetime | None = None
+    service_last_successful_restart: datetime | None = None
 
 
 class HealthResponse(BaseModel):
@@ -62,6 +65,10 @@ class HealthResponse(BaseModel):
     llama_service_available: bool
     repos_accessible: bool
     error: str | None = None
+    llama_service_crashed: bool = False
+    llama_service_restart_attempts: int = 0
+    llama_service_last_crash: datetime | None = None
+    llama_service_last_successful_restart: datetime | None = None
 
 
 class TriggerResponse(BaseModel):
diff --git a/src/auto_commit_service/scheduler/daemon.py b/src/auto_commit_service/scheduler/daemon.py
index 1f28de6..7886387 100644
--- a/src/auto_commit_service/scheduler/daemon.py
+++ b/src/auto_commit_service/scheduler/daemon.py
@@ -91,6 +91,9 @@ class CommitDaemon:
         self._service_health: ServiceHealth | None = None
         self._last_health_check: datetime | None = None
         self._last_health_check_cycle = 0  # Track cycles since last check
+        self._service_restart_attempts = 0
+        self._last_crash_time: datetime | None = None
+        self._last_successful_restart: datetime | None = None
 
     def _build_repos(self) -> list[Repository]:
         """Build the list of repositories to process."""
@@ -380,6 +383,21 @@ class CommitDaemon:
         """Get the last health check timestamp."""
         return self._last_health_check
 
+    @property
+    def service_restart_attempts(self) -> int:
+        """Get the number of restart attempts."""
+        return self._service_restart_attempts
+
+    @property
+    def service_last_crash_time(self) -> datetime | None:
+        """Get the last crash timestamp."""
+        return self._last_crash_time
+
+    @property
+    def service_last_successful_restart(self) -> datetime | None:
+        """Get the last successful restart timestamp."""
+        return self._last_successful_restart
+
     async def _ensure_service_ready(self) -> bool:
         """Ensure llama service is available, starting if needed.
 
@@ -404,19 +422,47 @@ class CommitDaemon:
 
             if health == ServiceHealth.CRASHED or health == ServiceHealth.UNREACHABLE:
                 status_msg = "crashed (stale PID)" if health == ServiceHealth.CRASHED else "unreachable"
-                logger.info(f"Llama service {status_msg}, attempting to start...")
-                started = await self.service_manager.ensure_service_available()
-                if started:
-                    self._service_crashed = False
-                    self._service_health = ServiceHealth.HEALTHY
-                    return True
-                else:
-                    self._service_crashed = True
-                    logger.error("Failed to start llama service")
-                    return False
+                self._last_crash_time = datetime.now()
+
+                logger.warning(
+                    f"Llama service {status_msg}, attempting restart "
+                    f"(attempt 1/{self.settings.llama_service_max_restart_attempts})"
+                )
+
+                # Retry logic with exponential backoff
+                for attempt in range(1, self.settings.llama_service_max_restart_attempts + 1):
+                    self._service_restart_attempts = attempt
+
+                    started = await self.service_manager.ensure_service_available()
+                    if started:
+                        self._service_crashed = False
+                        self._service_health = ServiceHealth.HEALTHY
+                        self._service_restart_attempts = 0
+                        self._last_successful_restart = datetime.now()
+                        logger.info(f"✓ Llama service restarted successfully (attempt {attempt}/{self.settings.llama_service_max_restart_attempts})")
+                        return True
+
+                    # Failed this attempt
+                    if attempt < self.settings.llama_service_max_restart_attempts:
+                        backoff = self.settings.llama_service_restart_backoff_seconds * attempt
+                        logger.warning(
+                            f"✗ Restart attempt {attempt}/{self.settings.llama_service_max_restart_attempts} failed, "
+                            f"retrying in {backoff:.1f}s..."
+                        )
+                        await asyncio.sleep(backoff)
+                    else:
+                        logger.error(
+                            f"✗ Failed to restart llama service after {self.settings.llama_service_max_restart_attempts} attempts"
+                        )
+
+                # All attempts failed
+                self._service_crashed = True
+                self._service_restart_attempts = self.settings.llama_service_max_restart_attempts
+                return False
 
             # Service is healthy
             self._service_crashed = False
+            self._service_restart_attempts = 0
             return True
         else:
             # Skip health check this cycle