diff --git a/docs/architecture.md b/docs/architecture.md
index c695214..8b5bc2e 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -67,17 +67,21 @@ repos_base_paths = [
           │ HTTP
           ▼
 ┌─────────────────────┐
-│   llama-service     │ Port 8000
+│   llama-http        │ Port 10010
 │   (LLM inference)   │
 └─────────┬───────────┘
           │
           ▼
 ┌─────────────────────┐
-│  qwen2.5-1.5b       │ ~1.1GB model
-│  (commit messages)  │
+│ ministral-14b       │ reasoning model (analyze)
+│ ministral-3b        │ instruct model (format)
 └─────────────────────┘
 ```
 
+The service uses a multi-model approach:
+- **Reasoning model** (ministral-14b): Deep analysis of code changes
+- **Instruct model** (ministral-3b): Fast commit message formatting
+
 ## Cycle Flow
 
 The service uses a **per-repo atomic workflow**:
@@ -86,28 +90,59 @@ The service uses a **per-repo atomic workflow**:
 ┌─────────────────────────────────────────┐
 │              CYCLE LOOP                 │
 ├─────────────────────────────────────────┤
-│  repo-a: commit → push → done           │
-│  repo-b: commit → push → done           │
+│  repo-a: pipeline → push → done         │
+│  repo-b: pipeline → push → done         │
 │  repo-c: no changes → skip              │
-│  repo-d: commit → push → done           │
+│  repo-d: pipeline → push → done         │
 │                 ↓                       │
 │         All repos processed             │
 │                 ↓                       │
+│         Persist commit history          │
+│                 ↓                       │
 │           Sleep X seconds               │
 │                 ↓                       │
 │            Next cycle                   │
 └─────────────────────────────────────────┘
 ```
 
+### Pipeline Stages
+
+For each repo with uncommitted changes, a 6-stage pipeline processes the working directory changes:
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                         COMMIT PIPELINE                             │
+├─────────────────────────────────────────────────────────────────────┤
+│                                                                     │
+│  1. DETECT     Find changed files in working directory              │
+│       ↓        (uncommitted changes, not yet git-staged)            │
+│                                                                     │
+│  2. GROUP      Cluster related files into logical commit batches    │
+│       ↓        (LLM groups by feature/purpose)                      │
+│                                                                     │
+│  3. ANALYZE    LLM reads each batch's diff to understand changes    │
+│       ↓        (what does this code change do?)                     │
+│                                                                     │
+│  4. FORMAT     Generate commit message from analysis                │
+│       ↓        (conventional commit format with emoji)              │
+│                                                                     │
+│  5. COMMIT     git add + git commit for each batch                  │
+│       ↓        (files are staged and committed here)                │
+│                                                                     │
+│  6. PUSH       Push commits to remote                               │
+│                (with conflict resolution if needed)                 │
+│                                                                     │
+└─────────────────────────────────────────────────────────────────────┘
+```
+
+**Terminology note**: "Analyzing commit 189/283" in logs means the LLM is analyzing the 189th batch of uncommitted changes. These are not yet git-staged or committed - that happens in stage 5.
+
 ### Per-Repo Processing
 For each repo:
-1. Check `git status --porcelain`
+1. Check `git status --porcelain` for uncommitted working directory changes
 2. Skip if no changes
-3. Get diff and send to llama-service for commit message
-4. Stage all changes (`git add -A`)
-5. Commit with generated message
-6. **Push immediately** to remote
-7. Move to next repo
+3. Run pipeline: detect → group → analyze → format → commit → push
+4. Move to next repo
 
 ### Cycle Completion
 When all repos processed:
@@ -121,6 +156,18 @@ When all repos processed:
 - **Progress visible**: Changes appear on remote as processed
 - **Fail-isolated**: One repo failing doesn't block others
 
+## Data Persistence
+
+Commit history is persisted to survive daemon restarts:
+
+| File | Location | Purpose |
+|------|----------|---------|
+| History | `~/.cache/commits/history.json` | Last 100 commits (hash, repo, timestamp) |
+| Activity | `~/.cache/commits/activity.jsonl` | Detailed activity log |
+| Database | `~/.cache/commits/auto_commit.db` | SQLite for structured queries |
+
+**Important**: History is only persisted when a cycle completes. If the daemon is interrupted mid-cycle (stuck hook, crash, etc.), commits made during that cycle won't appear in history.
+
 ## API Endpoints
 
 | Endpoint | Method | Purpose |
@@ -131,7 +178,8 @@ When all repos processed:
 | `/trigger` | POST | Manually trigger a commit cycle |
 | `/enable` | POST | Enable the daemon |
 | `/disable` | POST | Disable the daemon |
-| `/history` | GET | View commit history |
+| `/report/commits` | GET | View commit history |
+| `/report/summary` | GET | Comprehensive daemon report |
 
 ## Configuration
 
diff --git a/src/auto_commit_service/pipeline/cot/__init__.py b/src/auto_commit_service/pipeline/cot/__init__.py
new file mode 100644
index 0000000..afe197e
--- /dev/null
+++ b/src/auto_commit_service/pipeline/cot/__init__.py
@@ -0,0 +1,11 @@
+"""CoT (Chain-of-Thought) reasoning utilities.
+
+Provides prompt templates and response parsing for intelligent
+commit message generation using extended thinking.
+"""
+
+from .reasoner import CommitMessageReasoner
+
+__all__ = [
+    "CommitMessageReasoner",
+]
diff --git a/src/auto_commit_service/pipeline/cot/reasoner.py b/src/auto_commit_service/pipeline/cot/reasoner.py
new file mode 100644
index 0000000..af31e52
--- /dev/null
+++ b/src/auto_commit_service/pipeline/cot/reasoner.py
@@ -0,0 +1,297 @@
+"""CoT (Chain-of-Thought) reasoning utilities.
+
+Provides prompt templates and response parsing for commit message
+generation using extended thinking.
+"""
+
+import logging
+import re
+from typing import List
+
+from ..gitmoji import GITMOJI_MAP, get_all_types, get_valid_types_regex
+from ..models import CommitMessage, Component, Convention, FileGroup
+
+logger = logging.getLogger(__name__)
+
+
+class CommitMessageReasoner:
+    """Builds CoT prompts and parses reasoning results.
+
+    Handles:
+    - Prompt construction with RAG context
+    - Response parsing to extract commit message
+    - Reasoning step extraction
+    - Category and scope inference
+    """
+
+    COMMIT_TYPES = sorted(get_all_types(), key=len, reverse=True)
+
+    EMOJI_MAP = GITMOJI_MAP
+
+    def build_cot_prompt(
+        self,
+        file_group: FileGroup,
+        conventions: List[Convention],
+        codebase_context: List[Component],
+        include_examples: bool = True,
+    ) -> str:
+        """Build CoT prompt for commit message generation.
+
+        Args:
+            file_group: Files being committed together
+            conventions: Project conventions from RAG
+            codebase_context: Related components from RAG
+            include_examples: Whether to include example format
+
+        Returns:
+            Complete CoT prompt string
+        """
+        # Format conventions
+        conventions_section = self._format_conventions(conventions)
+
+        # Format codebase context
+        context_section = self._format_codebase_context(codebase_context)
+
+        # Build prompt
+        prompt = f"""You are generating a commit message for the following changes.
+
+**Changed Files ({len(file_group.files)}):**
+{self._format_file_list(file_group.files)}
+
+**Grouping Reasoning:**
+{file_group.reasoning}
+
+**Project Commit Conventions:**
+{conventions_section}
+
+**Codebase Context:**
+{context_section}
+
+**Task:**
+Use chain-of-thought reasoning to determine the best commit message.
+Think through each step carefully:
+
+1. **Analyze the change type** (choose the MOST SPECIFIC):
+   - New functionality → feat | UI changes → ui | Accessibility → a11y
+   - Bug fix → fix | Critical hotfix → hotfix | Security/auth fix → security
+   - Config/tooling → chore | Dev scripts → scripts | Build system → build
+   - Restructuring code → refactor | Architecture changes → arch | Move/rename → move
+   - Documentation → docs | Types/interfaces → types | i18n → i18n
+   - Adding tests → test | CI pipeline → ci
+   - Performance → perf | Remove code → remove | Dead code → deadcode
+   - Add dependency → deps-add | Remove dep → deps-remove | Upgrade deps → deps-upgrade
+   - Database → db | Infrastructure → infra | Deployment → deploy
+   - Feature flags → flags | Logging → logs-add/logs-remove | Analytics → analytics
+
+2. **Determine the scope:**
+   - Which component/module is primarily affected?
+   - Look at the file paths and codebase context
+   - Choose a concise scope name (1-2 words)
+
+3. **Follow project conventions:**
+   - Review the conventions above
+   - Match the established style and format
+   - Use appropriate emojis if conventions show emoji usage
+
+4. **Choose the right emoji:**
+   - ✨ feat | 🐛 fix | 🚑️ hotfix | 🔒️ security
+   - ♻️ refactor | 🎨 style | ⚡ perf | 🔥 remove | ⚰️ deadcode
+   - 📝 docs | 🔧 chore | 🔨 scripts
+   - ➕ deps-add | ➖ deps-remove | ⬆️ deps-upgrade | ⬇️ deps-downgrade | 📌 deps-pin
+   - ✅ test | 👷 ci | 📦️ build
+   - 🗃️ db | 🧱 infra | 🚀 deploy
+   - 💄 ui | 📱 responsive | ♿️ a11y | 🚸 ux
+   - 🏗️ arch | 🚚 move | 💥 breaking
+
+5. **Write a concise description:**
+   - Focus on WHAT changed in the FILES and WHY
+   - Describe the actual file changes (code, docs, config)
+   - NOT your reasoning process or analysis steps
+   - Don't describe HOW (that's in the code)
+   - Keep it under 50 characters if possible
+   - Use imperative mood ("Add" not "Added")
+
+**CRITICAL Output Format:**
+After your reasoning, you MUST output the final commit message on a new line starting with "COMMIT MESSAGE:"
+
+The commit message must describe the actual file changes, not your thinking.
+
+Example format: `type(scope): emoji description`
+Example: `feat(auth): ✨ Add OAuth2 login support`
+
+Now reason through the steps above and generate the commit message."""
+
+        if include_examples:
+            prompt += self._add_examples_section()
+
+        return prompt
+
+    def parse_cot_response(
+        self,
+        response: str,
+        thinking: str | None,
+        file_group: FileGroup,
+    ) -> CommitMessage:
+        """Parse CoT response into CommitMessage.
+
+        Args:
+            response: Full response text
+            thinking: Extended thinking output (if available)
+            file_group: Original file group
+
+        Returns:
+            CommitMessage with parsed content
+        """
+        # Extract commit message
+        message = self._extract_commit_message(response)
+
+        # Infer category and scope
+        category = self._infer_category(message)
+        scope = self._infer_scope(message)
+
+        return CommitMessage(
+            files=file_group.files,
+            message=message,
+            reasoning=thinking or self._extract_reasoning_from_response(response),
+            category=category,
+            scope=scope,
+        )
+
+    def _format_conventions(self, conventions: List[Convention]) -> str:
+        """Format conventions for prompt."""
+        if not conventions:
+            return "No specific conventions found - use standard format: `type(scope): description`"
+
+        parts = []
+        for idx, conv in enumerate(conventions[:3], 1):
+            parts.append(
+                f"\n**Convention {idx}** (from {conv.source}, relevance: {conv.relevance_score:.2f})\n"
+                f"{conv.content[:400]}..."
+            )
+        return "".join(parts)
+
+    def _format_codebase_context(self, components: List[Component]) -> str:
+        """Format codebase context for prompt."""
+        if not components:
+            return "No codebase context available"
+
+        parts = []
+        for comp in components[:5]:
+            desc = comp.description[:100] if comp.description else "N/A"
+            parts.append(f"- **{comp.name}** ({comp.type}): {desc}")
+
+        return "\n".join(parts)
+
+    def _format_file_list(self, files: List[str]) -> str:
+        """Format file list for prompt."""
+        return "\n".join(f"- {f}" for f in files)
+
+    def _add_examples_section(self) -> str:
+        """Add examples section to prompt."""
+        return """
+
+**Good Commit Message Examples:**
+- `feat(auth): ✨ Add OAuth2 login support`
+- `fix(api): 🐛 Fix null pointer in user endpoint`
+- `deps-upgrade(npm): ⬆️ Bump vite from 5.2 to 6.0`
+- `security(auth): 🔒️ Sanitize JWT token claims`
+- `db(migrations): 🗃️ Add index on users.email column`
+- `deploy(k8s): 🚀 Add horizontal pod autoscaler`
+- `perf(queries): ⚡ Cache frequently accessed lookups`
+- `ci(github): 👷 Add parallel test matrix`
+- `refactor(db): ♻️ Simplify query builder`
+- `arch(pipeline): 🏗️ Extract stage interface from orchestrator`
+- `ui(dashboard): 💄 Redesign metrics cards layout`
+- `test(auth): ✅ Add OAuth2 integration tests`
+"""
+
+    def _extract_commit_message(self, response: str) -> str:
+        """Extract final commit message from response."""
+        # Reasoning patterns to exclude from commit messages
+        reasoning_patterns = [
+            r'\*\*',  # Markdown bold markers - reasoning text often uses these
+            r'chain.?of.?thought',  # Explicit CoT markers
+            r'reasoning',  # Explicit reasoning markers
+            r'let(\s+me|\s?\'?s)\s+(analyze|think|consider|break|examine)',
+            r'step\s+by\s+step',
+            r'first,?\s+',
+            r'to\s+(understand|determine|identify)',
+            r'based\s+on\s+the',
+            r'\d+\.\s*\*?\*?',  # Numbered lists
+            r'analyze\s+the\s+(change|type|scope)',
+        ]
+
+        # Look for "COMMIT MESSAGE:" marker
+        if "COMMIT MESSAGE:" in response:
+            lines = response.split("COMMIT MESSAGE:", 1)[1].strip().splitlines()
+            for line in lines:
+                line = line.strip()
+                if not line or line.startswith("#") or line.startswith("-"):
+                    continue
+                # Check if line contains reasoning patterns
+                if any(re.search(pattern, line, re.IGNORECASE) for pattern in reasoning_patterns):
+                    continue
+                return line
+
+        # Fallback: look for pattern like "type(scope): emoji description"
+        pattern = rf"({get_valid_types_regex()})\([^)]+\):\s*[^\n]+"
+        matches = re.findall(pattern, response, re.IGNORECASE)
+        if matches:
+            # Find the full line containing this match, but exclude reasoning text
+            for line in response.splitlines():
+                line = line.strip()
+                if matches[0] in line.lower():
+                    # Check if line contains reasoning patterns
+                    if any(re.search(pattern, line, re.IGNORECASE) for pattern in reasoning_patterns):
+                        continue
+                    return line
+
+        # No valid commit message found - this is a failure
+        lines = [l.strip() for l in response.splitlines() if l.strip()]
+        raise RuntimeError(
+            f"Failed to extract valid commit message from LLM response. "
+            f"Response had {len(lines)} lines but none matched conventional commit format. "
+            f"First line: {lines[0][:100] if lines else 'empty response'}"
+        )
+
+    def _extract_reasoning_from_response(self, response: str) -> str:
+        """Extract reasoning steps from response."""
+        # If there's a COMMIT MESSAGE marker, everything before it is reasoning
+        if "COMMIT MESSAGE:" in response:
+            return response.split("COMMIT MESSAGE:")[0].strip()
+
+        # Otherwise, take first 500 chars
+        return response[:500]
+
+    def _infer_category(self, message: str) -> str | None:
+        """Infer category from message."""
+        message_lower = message.lower()
+
+        for commit_type in self.COMMIT_TYPES:
+            if message_lower.startswith(commit_type):
+                return commit_type
+
+        return None
+
+    def _infer_scope(self, message: str) -> str | None:
+        """Infer scope from message."""
+        # Extract scope from format: type(scope):
+        match = re.match(r"[a-z]+\(([^)]+)\):", message, re.IGNORECASE)
+        if match:
+            return match.group(1)
+
+        return None
+
+    def suggest_emoji(self, category: str | None) -> str:
+        """Suggest emoji for commit category.
+
+        Args:
+            category: Commit category (feat, fix, etc.)
+
+        Returns:
+            Emoji string
+        """
+        if category and category in self.EMOJI_MAP:
+            return self.EMOJI_MAP[category]
+
+        return "🔧"  # Default to chore emoji
diff --git a/tests/conftest.py b/tests/conftest.py
index 8ef03fe..6f819d6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,12 +1,56 @@
 """Pytest fixtures for auto-commit-service tests."""
 
-import asyncio
+import json
+import logging
 import subprocess
+import time
+import urllib.request
 from pathlib import Path
 from typing import Generator
 
 import pytest
 
+logger = logging.getLogger(__name__)
+
+COORDINATOR_URL = "http://localhost:8210"
+MAX_WAIT_SECONDS = 120
+POLL_INTERVAL_SECONDS = 3
+
+
+def _coordinator_healthy() -> bool:
+    """Check if the model-boss coordinator is reachable."""
+    try:
+        req = urllib.request.Request(f"{COORDINATOR_URL}/health", method="GET")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            return resp.status == 200
+    except Exception:
+        return False
+
+
+@pytest.fixture(scope="session")
+def gpu_services() -> Generator[dict[str, str], None, None]:
+    """Ensure the model-boss coordinator is healthy for GPU integration tests.
+
+    The coordinator handles model loading, VRAM management, and health monitoring.
+    Yields a dict with the coordinator URL for backwards compatibility.
+    """
+    if not _coordinator_healthy():
+        pytest.fail(
+            f"model-boss coordinator not reachable at {COORDINATOR_URL}. "
+            "Start it with: systemctl --user start model-boss-coordinator.service"
+        )
+
+    logger.info("gpu_services: coordinator healthy at %s", COORDINATOR_URL)
+    yield {"coordinator": COORDINATOR_URL}
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    """Register custom markers."""
+    config.addinivalue_line(
+        "markers",
+        "gpu: marks tests as requiring GPU/llama-http service (deselect with '-m \"not gpu\"')",
+    )
+
 
 @pytest.fixture
 def temp_git_repo(tmp_path: Path) -> Generator[Path, None, None]:
@@ -71,13 +115,28 @@ index 0000000..1234567
 
 @pytest.fixture
 def mock_settings():
-    """Create test settings."""
+    """Create test settings for unit tests (no GPU required)."""
     from auto_commit_service.config import AutoCommitSettings
 
     return AutoCommitSettings(
         service_name="test-auto-commit",
-        llama_service_url="http://localhost:8000",
+        reasoning_model_id="ministral-14b-reasoning",
+        instruct_model_id="ministral-3b-instruct",
+        llm_timeout=30.0,
         cycle_interval_seconds=1,  # Fast for tests
         claude_fallback_enabled=False,
         enabled=False,  # Don't auto-start
     )
+
+
+@pytest.fixture
+def gpu_settings():
+    """Create settings for GPU integration tests."""
+    from auto_commit_service.config import AutoCommitSettings
+
+    return AutoCommitSettings(
+        service_name="gpu-integration-test",
+        reasoning_model_id="ministral-14b-reasoning",
+        instruct_model_id="ministral-3b-instruct",
+        llm_timeout=30.0,  # Shorter timeout for fail-fast
+    )