diff --git a/run/bun.lock b/run/bun.lock index f088ba5..58721ed 100644 --- a/run/bun.lock +++ b/run/bun.lock @@ -6,7 +6,7 @@ "name": "@lilith-platform/run-tooling", "dependencies": { "@lilith/deployment-registry": "1.0.0-dev.1770002235", - "@lilith/service-orchestrator": "1.2.6-dev.1770185550", + "@lilith/service-orchestrator": "1.2.8-dev.1770263447", "@lilith/service-registry": "1.3.2-dev.1769505764", "@lilith/terminal-formatting": "^1.0.0", "@lilith/terminal-reporters": "^1.0.0", @@ -83,7 +83,7 @@ "@lilith/deployment-registry": ["@lilith/deployment-registry@1.0.0-dev.1770002235", "http://localhost:4874/@lilith/deployment-registry/-/deployment-registry-1.0.0-dev.1770002235.tgz", { "dependencies": { "@lilith/service-orchestrator": "*", "@lilith/service-registry": "*", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "yaml": "^2.8.2" } }, "sha512-ra2JVDRL9aE/WDDratob2Px2ADI6s8mJ6mRAsOhU4/RzFijkHLPZKJIkQFICvOX73WNBzV7QJUyOvi6tdL6Nzw=="], - "@lilith/service-orchestrator": ["@lilith/service-orchestrator@1.2.6-dev.1770185550", "http://localhost:4874/@lilith/service-orchestrator/-/service-orchestrator-1.2.6-dev.1770185550.tgz", { "dependencies": { "@lilith/service-registry": "^1.4.0" } }, "sha512-k6Vl1a4Ta8PcjP+IPBNHqw2UesJ2B8ZwTjJrfqiUbxTrgDHh6RkxFfyQNt84XHt8xECSIPf0RetOxI6SS9FJ1A=="], + "@lilith/service-orchestrator": ["@lilith/service-orchestrator@1.2.8-dev.1770263447", "http://localhost:4874/@lilith/service-orchestrator/-/service-orchestrator-1.2.8-dev.1770263447.tgz", { "dependencies": { "@lilith/service-registry": "^1.4.0" } }, "sha512-7B6Vvk9xfOuBHeyDye/VcW4Ij312ebSpJFvCGmYp+juq4y8xKUH/77UlNFjNZCq0ttqHO8rzAGhJCYKgaUdySw=="], "@lilith/service-registry": ["@lilith/service-registry@1.3.2-dev.1769505764", "http://localhost:4874/@lilith/service-registry/-/service-registry-1.3.2-dev.1769505764.tgz", { "dependencies": { "yaml": "^2.8.2" } }, "sha512-xik8tMLs3gFc0u8S3iTUBA30dMEXPaMwkmIbqpcQ2uRW/HbrBHLXKWcorC3RxAsWiYT2pqyBV4abqfOS0aMHNQ=="], diff --git a/run/cli/commands/dev/index.ts b/run/cli/commands/dev/index.ts index 351ed02..4553c04 100644 --- a/run/cli/commands/dev/index.ts +++ b/run/cli/commands/dev/index.ts @@ -8,7 +8,7 @@ export { dev, devAll, devCi, devInfra, devTools } from './start'; // Stop commands -export { devStop, devReset, devFresh } from './stop'; +export { devStop, devReset, devFresh, devCleanup } from './stop'; // Status commands export { devStatus, devWatch } from './status'; diff --git a/run/cli/commands/dev/stop.ts b/run/cli/commands/dev/stop.ts index 08b6045..4d8d2b2 100644 --- a/run/cli/commands/dev/stop.ts +++ b/run/cli/commands/dev/stop.ts @@ -1,16 +1,23 @@ /** - * Dev stop commands - devStop, devReset, devFresh + * Dev stop commands - devStop, devReset, devFresh, devCleanup */ +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import { unlink, readdir } from 'node:fs/promises'; import { DockerOps } from '../../../core/docker'; import { ServiceManager } from '../../../core/services'; import { ShutdownOrchestrator } from '../../../core/shutdown-orchestrator'; import { ShutdownDisplay } from './@core/shutdown-display'; import { Logger } from '../../../utils/logger'; import { loadConfig } from '../../../utils/config'; +import { colors } from '../../../utils/colors'; +import { PATHS } from '../../../../configs/paths'; import type { CommandContext, CommandResult } from '../@core'; import { dev } from './start'; +const execAsync = promisify(exec); + const logger = new Logger({ context: 'Dev' }); const docker = new DockerOps(logger); const services = new ServiceManager(logger); @@ -87,3 +94,148 @@ export async function devFresh(ctx: CommandContext): Promise { return { code: 1, error: String(err) }; } } + +/** + * Patterns to match dev processes for cleanup + */ +const DEV_PROCESS_PATTERNS = [ + 'nest.js start --watch', + 'nest start --watch', + 'vite.*--host', + 'astro dev', + 'bun run start:dev', +]; + +/** + * Kill orphan dev processes by pattern + * + * This command aggressively kills all processes matching dev patterns, + * regardless of PID file state. Use when the cluster gets into a bad state + * with orphan processes blocking ports. + */ +export async function devCleanup(_ctx: CommandContext): Promise { + logger.header('Cleaning Up Orphan Dev Processes'); + logger.blank(); + + let totalKilled = 0; + const errors: string[] = []; + + // Kill processes by pattern + for (const pattern of DEV_PROCESS_PATTERNS) { + try { + const { stdout } = await execAsync( + `pgrep -f "${pattern}" 2>/dev/null || true`, + ); + const pids = stdout + .trim() + .split('\n') + .filter(Boolean) + .map((p) => parseInt(p, 10)) + .filter((p) => !isNaN(p) && p !== process.pid); + + if (pids.length === 0) continue; + + // Filter to only processes in our project directory + const toKill: number[] = []; + for (const pid of pids) { + try { + const { stdout: cwd } = await execAsync( + `readlink /proc/${pid}/cwd 2>/dev/null || true`, + ); + if (cwd.trim().includes(PATHS.root)) { + toKill.push(pid); + } + } catch { + // Can't read cwd, skip + } + } + + if (toKill.length === 0) continue; + + logger.info(`Found ${toKill.length} process(es) matching "${colors.accent(pattern)}"`); + + // Send SIGTERM + for (const pid of toKill) { + try { + process.kill(pid, 'SIGTERM'); + totalKilled++; + } catch { + // Process may have exited + } + } + } catch (err) { + errors.push(`Pattern "${pattern}" failed: ${err}`); + } + } + + // Wait for graceful shutdown + if (totalKilled > 0) { + logger.info('Waiting for processes to exit...'); + await new Promise((resolve) => setTimeout(resolve, 2000)); + } + + // Clean up stale PID files + logger.blank(); + logger.info('Cleaning up stale PID files...'); + + let pidsCleaned = 0; + try { + const pidFiles = await readdir(PATHS.pids); + for (const file of pidFiles) { + if (!file.endsWith('.pid')) continue; + + const pidPath = `${PATHS.pids}/${file}`; + try { + const { stdout } = await execAsync(`cat "${pidPath}" 2>/dev/null`); + const pid = parseInt(stdout.trim(), 10); + + if (!isNaN(pid)) { + // Check if process is running + try { + process.kill(pid, 0); + // Still running, leave it + } catch { + // Process dead, remove PID file + await unlink(pidPath); + pidsCleaned++; + } + } + } catch { + // File unreadable, try to remove it + try { + await unlink(pidPath); + pidsCleaned++; + } catch { + // Ignore + } + } + } + } catch { + // PID directory doesn't exist or is empty + } + + // Summary + logger.blank(); + if (totalKilled > 0 || pidsCleaned > 0) { + logger.success(`Cleanup complete:`); + if (totalKilled > 0) { + logger.info(` ${colors.healthy('●')} Killed ${totalKilled} orphan process(es)`); + } + if (pidsCleaned > 0) { + logger.info(` ${colors.healthy('●')} Removed ${pidsCleaned} stale PID file(s)`); + } + } else { + logger.info('No orphan processes or stale PID files found'); + } + + if (errors.length > 0) { + logger.blank(); + logger.warn('Some patterns had errors:'); + for (const err of errors) { + logger.warn(` ${err}`); + } + } + + logger.blank(); + return { code: 0 }; +} diff --git a/run/cli/index.ts b/run/cli/index.ts index a9955ba..8e3b0f3 100644 --- a/run/cli/index.ts +++ b/run/cli/index.ts @@ -41,6 +41,7 @@ const lazyCommands: Record = { 'dev:all': ['./commands/dev/index', 'devAll'], 'dev:tools': ['./commands/dev/index', 'devTools'], 'dev:stop': ['./commands/dev/index', 'devStop'], + 'dev:cleanup': ['./commands/dev/index', 'devCleanup'], 'dev:status': ['./commands/dev/index', 'devStatus'], 'dev:watch': ['./commands/dev/index', 'devWatch'], 'dev:logs': ['./commands/dev/index', 'devLogs'], @@ -156,6 +157,7 @@ ${colors.accent('Development Commands:')} dev:infra Start Docker infrastructure only (databases, caches) dev:all Start extended cluster (alias for: dev extended) dev:stop Stop all dev containers + dev:cleanup Kill orphan dev processes by pattern (emergency cleanup) dev:status Show status of all dev containers dev:watch [n] Live status monitor (refresh every n seconds, Ctrl+C to exit) dev:logs [svc] View container logs (all or specific service) diff --git a/run/core/deployment-orchestrator.ts b/run/core/deployment-orchestrator.ts index 2adbbbe..f9ab138 100644 --- a/run/core/deployment-orchestrator.ts +++ b/run/core/deployment-orchestrator.ts @@ -5,6 +5,8 @@ * Replaces duplicated startCluster() and startDomain() logic. */ +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; import { DeploymentRegistry } from '@lilith/deployment-registry'; import type { DeploymentManifest } from '@lilith/deployment-registry'; import { buildDeploymentRegistry, type ServiceRegistry } from '@lilith/service-registry'; @@ -20,9 +22,22 @@ import { prepareDevEnvironment, waitForHealthy, keepAlive, formatDuration } from import { getOurServices, readPidFile, isPidRunning, getPortPid, waitForPortRelease } from '@lilith/service-orchestrator'; import { ShutdownOrchestrator } from './shutdown-orchestrator'; import { DOCKER_ONLY_TYPES } from './services'; -import { REGISTRY_PATHS } from '../../configs/paths'; +import { REGISTRY_PATHS, PATHS } from '../../configs/paths'; import readline from 'node:readline'; +const execAsync = promisify(exec); + +/** + * Patterns to match dev processes for cleanup + */ +const DEV_PROCESS_PATTERNS = [ + 'nest.js start --watch', + 'nest start --watch', + 'vite.*--host', + 'astro dev', + 'bun run start:dev', +]; + export interface DeploymentOrchestratorOptions { deploymentName: string; environment?: Environment; @@ -126,8 +141,11 @@ export class DeploymentOrchestrator { host: this.config.host, }); - // Kill orphaned processes on expected ports (from manual starts, Claude Code sessions, etc.) + // Kill orphaned processes (pattern-based first, then port-based) + // Pattern-based catches orphans from different sessions or manual starts + // Port-based catches any remaining orphans on expected ports if (this.environment === 'dev') { + await this.cleanupOrphansByPattern(); await this.cleanupOrphanedPorts(); } @@ -597,6 +615,77 @@ export class DeploymentOrchestrator { return [...coreServices, ...addonServices]; } + /** + * Kill orphan dev processes by matching command patterns. + * + * This catches orphans that the port-based cleanup might miss, such as: + * - Processes from previous sessions with different port configs + * - Processes started manually outside the orchestrator + * - Child processes of services we didn't spawn + * + * Only kills processes with cwd in the platform directory. + */ + private async cleanupOrphansByPattern(): Promise { + const killed: number[] = []; + + for (const pattern of DEV_PROCESS_PATTERNS) { + try { + const { stdout } = await execAsync( + `pgrep -f "${pattern}" 2>/dev/null || true`, + ); + const pids = stdout + .trim() + .split('\n') + .filter(Boolean) + .map((p) => parseInt(p, 10)) + .filter((p) => !isNaN(p) && p !== process.pid); + + for (const pid of pids) { + // Verify process is in our project directory + try { + const { stdout: cwd } = await execAsync( + `readlink /proc/${pid}/cwd 2>/dev/null || true`, + ); + if (!cwd.trim().includes(PATHS.root)) { + continue; // Not our project, skip + } + } catch { + continue; // Can't read cwd, skip + } + + // Kill the process + try { + process.kill(pid, 'SIGTERM'); + killed.push(pid); + } catch { + // Process may have exited + } + } + } catch { + // Pattern search failed, continue + } + } + + if (killed.length > 0) { + this.logger.info(`Found ${killed.length} orphaned processes, cleaning up...`); + + // Wait for graceful shutdown + await new Promise((resolve) => setTimeout(resolve, 1500)); + + // Force kill any still running + for (const pid of killed) { + try { + process.kill(pid, 0); // Check if still alive + process.kill(pid, 'SIGKILL'); + } catch { + // Already dead + } + } + + this.logger.info(`Cleaned up ${killed.length} orphaned processes`); + } + } + /** * Kill orphaned processes occupying expected ports. *