platform-tooling/run/core/state-tracker.ts
Quinn Ftw 85621b287e chore: snapshot before monorepo consolidation
Capture current working state before converting platform-tooling
into a submodule of the lilith-platform monorepo.
2026-01-29 07:04:39 -08:00

266 lines
6.2 KiB
TypeScript

/**
* Dev cluster state tracking for freeze detection
*
* Provides:
* - Atomic state file writes for external inspection
* - Heartbeat tracking (updated every 2s)
* - Phase and progress tracking
* - Process and health check metadata
*/
import { writeFile, rename } from 'node:fs/promises';
import type { ContainerStatus } from './docker.js';
// =============================================================================
// Types
// =============================================================================
export interface DevClusterState {
version: '1.0.0';
orchestratorPid: number;
startedAt: number;
lastHeartbeat: number;
phase: {
index: number;
name: string;
startedAt: number;
expectedDuration?: number;
};
progress: {
servicesTotal: number;
servicesStarted: number;
servicesFailed: number;
currentService?: string;
};
children: {
docker?: {
pid?: number;
command: string;
startedAt: number;
};
pm2?: {
daemonPid?: number;
services: Array<{
id: string;
pid?: number;
status: string;
}>;
};
migrations?: {
pid?: number;
startedAt?: number;
};
};
healthChecks: {
docker: {
lastCheckAt: number;
containers: Array<{
name: string;
health?: string;
status: string;
}>;
};
services: {
lastCheckAt: number;
results: Record<string, {
healthy: boolean;
lastCheck: number;
}>;
};
};
}
// =============================================================================
// DevStateTracker
// =============================================================================
export class DevStateTracker {
private state: DevClusterState;
private heartbeatTimer: NodeJS.Timeout | null = null;
private readonly statePath = '/tmp/lilith-dev-state.json';
private readonly tmpPath = '/tmp/lilith-dev-state.json.tmp';
private stopped = false;
constructor() {
this.state = {
version: '1.0.0',
orchestratorPid: process.pid,
startedAt: Date.now(),
lastHeartbeat: Date.now(),
phase: {
index: 0,
name: 'Initializing',
startedAt: Date.now(),
},
progress: {
servicesTotal: 0,
servicesStarted: 0,
servicesFailed: 0,
},
children: {},
healthChecks: {
docker: {
lastCheckAt: 0,
containers: [],
},
services: {
lastCheckAt: 0,
results: {},
},
},
};
}
/**
* Start heartbeat timer (writes state every 2s)
*/
start(): void {
this.stopped = false;
// Initial write
this.writeState().catch((err) => {
console.error(`[StateTracker] Failed to write initial state: ${err instanceof Error ? err.message : 'Unknown error'}`);
});
// Periodic writes
this.heartbeatTimer = setInterval(() => {
if (!this.stopped) {
this.writeState().catch((err) => {
console.error(`[StateTracker] Failed to write state: ${err instanceof Error ? err.message : 'Unknown error'}`);
});
}
}, 2000);
}
/**
* Stop heartbeat timer and write final state
*/
async stop(): Promise<void> {
this.stopped = true;
if (this.heartbeatTimer) {
clearInterval(this.heartbeatTimer);
this.heartbeatTimer = null;
}
// Final write
await this.writeState();
}
/**
* Set current phase
*/
setPhase(index: number, name: string, expectedDuration?: number): void {
this.state.phase = {
index,
name,
startedAt: Date.now(),
expectedDuration,
};
}
/**
* Update progress counters
*/
updateProgress(progress: Partial<DevClusterState['progress']>): void {
this.state.progress = {
...this.state.progress,
...progress,
};
}
/**
* Track Docker operation
*/
trackDockerOperation(pid: number | undefined, command: string): void {
this.state.children.docker = {
pid,
command,
startedAt: Date.now(),
};
}
/**
* Track migration process
*/
trackMigration(pid: number | undefined): void {
this.state.children.migrations = {
pid,
startedAt: Date.now(),
};
}
/**
* Track PM2 daemon and services
*/
trackPM2(daemonPid: number | undefined, services: Array<{ id: string; pid?: number; status: string }>): void {
this.state.children.pm2 = {
daemonPid,
services,
};
}
/**
* Update Docker health check status
*/
updateDockerHealth(containers: ContainerStatus[]): void {
this.state.healthChecks.docker = {
lastCheckAt: Date.now(),
containers: containers.map(c => ({
name: c.name,
health: c.health,
status: c.status,
})),
};
}
/**
* Update service health check results
*/
updateServiceHealth(serviceId: string, healthy: boolean): void {
this.state.healthChecks.services.lastCheckAt = Date.now();
this.state.healthChecks.services.results[serviceId] = {
healthy,
lastCheck: Date.now(),
};
}
/**
* Get current state (for testing/diagnostics)
*/
getState(): Readonly<DevClusterState> {
return { ...this.state };
}
// ---------------------------------------------------------------------------
// Private Methods
// ---------------------------------------------------------------------------
/**
* Write state to disk atomically
* Uses tmp file + rename for atomic writes
*/
private async writeState(): Promise<void> {
try {
// Update heartbeat timestamp
this.state.lastHeartbeat = Date.now();
// Write to temporary file
await writeFile(this.tmpPath, JSON.stringify(this.state, null, 2), 'utf8');
// Atomic rename (on same filesystem)
await rename(this.tmpPath, this.statePath);
} catch (error) {
// Silently fail if we can't write state (e.g., disk full, permissions)
// This shouldn't crash the main process
if (this.stopped) return; // Don't log errors after stop
// Only log first error to avoid spam
console.error(`[StateTracker] Write failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
}