Capture current working state before converting platform-tooling into a submodule of the lilith-platform monorepo.
266 lines
6.2 KiB
TypeScript
266 lines
6.2 KiB
TypeScript
/**
|
|
* Dev cluster state tracking for freeze detection
|
|
*
|
|
* Provides:
|
|
* - Atomic state file writes for external inspection
|
|
* - Heartbeat tracking (updated every 2s)
|
|
* - Phase and progress tracking
|
|
* - Process and health check metadata
|
|
*/
|
|
|
|
import { writeFile, rename } from 'node:fs/promises';
|
|
import type { ContainerStatus } from './docker.js';
|
|
|
|
// =============================================================================
|
|
// Types
|
|
// =============================================================================
|
|
|
|
export interface DevClusterState {
|
|
version: '1.0.0';
|
|
orchestratorPid: number;
|
|
startedAt: number;
|
|
lastHeartbeat: number;
|
|
|
|
phase: {
|
|
index: number;
|
|
name: string;
|
|
startedAt: number;
|
|
expectedDuration?: number;
|
|
};
|
|
|
|
progress: {
|
|
servicesTotal: number;
|
|
servicesStarted: number;
|
|
servicesFailed: number;
|
|
currentService?: string;
|
|
};
|
|
|
|
children: {
|
|
docker?: {
|
|
pid?: number;
|
|
command: string;
|
|
startedAt: number;
|
|
};
|
|
pm2?: {
|
|
daemonPid?: number;
|
|
services: Array<{
|
|
id: string;
|
|
pid?: number;
|
|
status: string;
|
|
}>;
|
|
};
|
|
migrations?: {
|
|
pid?: number;
|
|
startedAt?: number;
|
|
};
|
|
};
|
|
|
|
healthChecks: {
|
|
docker: {
|
|
lastCheckAt: number;
|
|
containers: Array<{
|
|
name: string;
|
|
health?: string;
|
|
status: string;
|
|
}>;
|
|
};
|
|
services: {
|
|
lastCheckAt: number;
|
|
results: Record<string, {
|
|
healthy: boolean;
|
|
lastCheck: number;
|
|
}>;
|
|
};
|
|
};
|
|
}
|
|
|
|
// =============================================================================
|
|
// DevStateTracker
|
|
// =============================================================================
|
|
|
|
export class DevStateTracker {
|
|
private state: DevClusterState;
|
|
private heartbeatTimer: NodeJS.Timeout | null = null;
|
|
private readonly statePath = '/tmp/lilith-dev-state.json';
|
|
private readonly tmpPath = '/tmp/lilith-dev-state.json.tmp';
|
|
private stopped = false;
|
|
|
|
constructor() {
|
|
this.state = {
|
|
version: '1.0.0',
|
|
orchestratorPid: process.pid,
|
|
startedAt: Date.now(),
|
|
lastHeartbeat: Date.now(),
|
|
phase: {
|
|
index: 0,
|
|
name: 'Initializing',
|
|
startedAt: Date.now(),
|
|
},
|
|
progress: {
|
|
servicesTotal: 0,
|
|
servicesStarted: 0,
|
|
servicesFailed: 0,
|
|
},
|
|
children: {},
|
|
healthChecks: {
|
|
docker: {
|
|
lastCheckAt: 0,
|
|
containers: [],
|
|
},
|
|
services: {
|
|
lastCheckAt: 0,
|
|
results: {},
|
|
},
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Start heartbeat timer (writes state every 2s)
|
|
*/
|
|
start(): void {
|
|
this.stopped = false;
|
|
|
|
// Initial write
|
|
this.writeState().catch((err) => {
|
|
console.error(`[StateTracker] Failed to write initial state: ${err instanceof Error ? err.message : 'Unknown error'}`);
|
|
});
|
|
|
|
// Periodic writes
|
|
this.heartbeatTimer = setInterval(() => {
|
|
if (!this.stopped) {
|
|
this.writeState().catch((err) => {
|
|
console.error(`[StateTracker] Failed to write state: ${err instanceof Error ? err.message : 'Unknown error'}`);
|
|
});
|
|
}
|
|
}, 2000);
|
|
}
|
|
|
|
/**
|
|
* Stop heartbeat timer and write final state
|
|
*/
|
|
async stop(): Promise<void> {
|
|
this.stopped = true;
|
|
|
|
if (this.heartbeatTimer) {
|
|
clearInterval(this.heartbeatTimer);
|
|
this.heartbeatTimer = null;
|
|
}
|
|
|
|
// Final write
|
|
await this.writeState();
|
|
}
|
|
|
|
/**
|
|
* Set current phase
|
|
*/
|
|
setPhase(index: number, name: string, expectedDuration?: number): void {
|
|
this.state.phase = {
|
|
index,
|
|
name,
|
|
startedAt: Date.now(),
|
|
expectedDuration,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Update progress counters
|
|
*/
|
|
updateProgress(progress: Partial<DevClusterState['progress']>): void {
|
|
this.state.progress = {
|
|
...this.state.progress,
|
|
...progress,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Track Docker operation
|
|
*/
|
|
trackDockerOperation(pid: number | undefined, command: string): void {
|
|
this.state.children.docker = {
|
|
pid,
|
|
command,
|
|
startedAt: Date.now(),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Track migration process
|
|
*/
|
|
trackMigration(pid: number | undefined): void {
|
|
this.state.children.migrations = {
|
|
pid,
|
|
startedAt: Date.now(),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Track PM2 daemon and services
|
|
*/
|
|
trackPM2(daemonPid: number | undefined, services: Array<{ id: string; pid?: number; status: string }>): void {
|
|
this.state.children.pm2 = {
|
|
daemonPid,
|
|
services,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Update Docker health check status
|
|
*/
|
|
updateDockerHealth(containers: ContainerStatus[]): void {
|
|
this.state.healthChecks.docker = {
|
|
lastCheckAt: Date.now(),
|
|
containers: containers.map(c => ({
|
|
name: c.name,
|
|
health: c.health,
|
|
status: c.status,
|
|
})),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Update service health check results
|
|
*/
|
|
updateServiceHealth(serviceId: string, healthy: boolean): void {
|
|
this.state.healthChecks.services.lastCheckAt = Date.now();
|
|
this.state.healthChecks.services.results[serviceId] = {
|
|
healthy,
|
|
lastCheck: Date.now(),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get current state (for testing/diagnostics)
|
|
*/
|
|
getState(): Readonly<DevClusterState> {
|
|
return { ...this.state };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Private Methods
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Write state to disk atomically
|
|
* Uses tmp file + rename for atomic writes
|
|
*/
|
|
private async writeState(): Promise<void> {
|
|
try {
|
|
// Update heartbeat timestamp
|
|
this.state.lastHeartbeat = Date.now();
|
|
|
|
// Write to temporary file
|
|
await writeFile(this.tmpPath, JSON.stringify(this.state, null, 2), 'utf8');
|
|
|
|
// Atomic rename (on same filesystem)
|
|
await rename(this.tmpPath, this.statePath);
|
|
} catch (error) {
|
|
// Silently fail if we can't write state (e.g., disk full, permissions)
|
|
// This shouldn't crash the main process
|
|
if (this.stopped) return; // Don't log errors after stop
|
|
|
|
// Only log first error to avoid spam
|
|
console.error(`[StateTracker] Write failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
}
|
|
}
|
|
}
|