Infrastructure is now a sibling to codebase, not inside it.
Updated all relative paths accordingly:
- provisioning/reconcile.mjs: ../codebase/ instead of ../../codebase/
- reconciliation/services/*.sh: ${RECONCILE_ROOT}/../../codebase
- scripts/check-hosts: Fixed symlink to ../hosts/check-hosts
- scripts/*.sh: Updated path comments and references
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1063 lines
36 KiB
JavaScript
Executable file
1063 lines
36 KiB
JavaScript
Executable file
#!/usr/bin/env node
|
|
/**
|
|
* reconcile.mjs - Infrastructure Reconciliation Engine
|
|
*
|
|
* Compares current host state to desired YAML inventory and applies fixes.
|
|
* Supports multiple features: hostname, packages, services, firewall, vpn, agent.
|
|
*
|
|
* Flow: Probe → Snapshot → Apply → Verify → Commit/Rollback
|
|
*
|
|
* Usage:
|
|
* node reconcile.mjs # Dry-run: show what needs fixing
|
|
* node reconcile.mjs --apply # Apply all fixes
|
|
* node reconcile.mjs --host apricot # Only reconcile specific host
|
|
* node reconcile.mjs --feature hostname # Only reconcile specific feature
|
|
* node reconcile.mjs --verify-only # Re-verify without applying
|
|
* node reconcile.mjs --auto-rollback # Automatic rollback on failure
|
|
* node reconcile.mjs --no-rollback # Log failures, no rollback
|
|
*
|
|
* Part of: lilith-platform infrastructure
|
|
*/
|
|
|
|
import { readFileSync, writeFileSync, readdirSync, existsSync } from 'fs';
|
|
import { join, dirname } from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
import { execFileSync, spawnSync } from 'child_process';
|
|
import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';
|
|
|
|
// Verification system imports
|
|
import { hashFeatureState, hashTransactionState } from './lib/state-hasher.mjs';
|
|
import { SnapshotManager } from './lib/snapshot-manager.mjs';
|
|
import { TransactionManager } from './lib/transaction-manager.mjs';
|
|
import { VerificationEngine } from './lib/verification-engine.mjs';
|
|
import { RollbackExecutor } from './lib/rollback-executor.mjs';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const INVENTORY_PATH = join(__dirname, '../hosts');
|
|
|
|
// Colors
|
|
const RED = '\x1b[0;31m';
|
|
const GREEN = '\x1b[0;32m';
|
|
const YELLOW = '\x1b[1;33m';
|
|
const BLUE = '\x1b[0;34m';
|
|
const CYAN = '\x1b[0;36m';
|
|
const MAGENTA = '\x1b[0;35m';
|
|
const DIM = '\x1b[2m';
|
|
const NC = '\x1b[0m';
|
|
|
|
// Parse CLI args
|
|
const args = process.argv.slice(2);
|
|
const APPLY_MODE = args.includes('--apply') || args.includes('--auto');
|
|
const ALL_HOSTS = args.includes('--all'); // Explicit "all hosts" flag
|
|
const HOST_FILTER = args.includes('--host') ? args[args.indexOf('--host') + 1] : null;
|
|
const FEATURE_FILTER = args.includes('--feature') ? args[args.indexOf('--feature') + 1] : null;
|
|
const VERBOSE = args.includes('--verbose') || args.includes('-v');
|
|
const SHOW_HELP = args.includes('--help') || args.includes('-h');
|
|
|
|
// Verification CLI flags
|
|
const AUTO_ROLLBACK = args.includes('--auto-rollback');
|
|
const NO_ROLLBACK = args.includes('--no-rollback');
|
|
const VERIFY_ONLY = args.includes('--verify-only');
|
|
const LIST_SNAPSHOTS = args.includes('--list-snapshots');
|
|
const SHOW_SNAPSHOT = args.includes('--show-snapshot') ? args[args.indexOf('--show-snapshot') + 1] : null;
|
|
|
|
// Validate: --all and --host are mutually exclusive
|
|
if (ALL_HOSTS && HOST_FILTER) {
|
|
console.error('Error: --all and --host are mutually exclusive');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Validate: --auto-rollback and --no-rollback are mutually exclusive
|
|
if (AUTO_ROLLBACK && NO_ROLLBACK) {
|
|
console.error('Error: --auto-rollback and --no-rollback are mutually exclusive');
|
|
process.exit(1);
|
|
}
|
|
|
|
// Initialize verification system
|
|
const snapshotManager = new SnapshotManager(INVENTORY_PATH);
|
|
const transactionManager = new TransactionManager(snapshotManager, INVENTORY_PATH);
|
|
|
|
// Show help
|
|
if (SHOW_HELP) {
|
|
console.log(`
|
|
Infrastructure Reconciliation Engine
|
|
|
|
Usage:
|
|
node reconcile.mjs [options]
|
|
|
|
Options:
|
|
--auto, --apply Apply fixes automatically (default: dry-run)
|
|
--all Run on all hosts (default when no --host specified)
|
|
--host <name> Only reconcile specific host
|
|
--feature <name> Only reconcile specific feature
|
|
--verbose, -v Show detailed output
|
|
--help, -h Show this help
|
|
|
|
Verification Options:
|
|
--verify-only Re-verify current state without applying changes
|
|
--auto-rollback Automatically rollback on verification failure
|
|
--no-rollback Log verification failures but don't rollback
|
|
--list-snapshots List available snapshots for a host (requires --host)
|
|
--show-snapshot <ts> Show snapshot details by timestamp
|
|
|
|
Examples:
|
|
node reconcile.mjs # Dry-run all hosts
|
|
node reconcile.mjs --all --auto # Apply all fixes to all hosts
|
|
node reconcile.mjs --host apricot # Dry-run specific host
|
|
node reconcile.mjs --feature agent --auto # Apply agent fixes only
|
|
node reconcile.mjs --host apricot --auto --auto-rollback # Apply with rollback
|
|
node reconcile.mjs --host apricot --list-snapshots # List snapshots
|
|
node reconcile.mjs --host apricot --verify-only # Re-verify state
|
|
|
|
Features:
|
|
hostname, packages, services, firewall, vpn, agent
|
|
|
|
Verification Flow:
|
|
1. SNAPSHOT - Capture pre-reconciliation state
|
|
2. PROBE - Detect current state
|
|
3. COMPARE - Find drift
|
|
4. APPLY - Make changes
|
|
5. VERIFY - Re-probe and validate (first step = last step)
|
|
6. COMMIT - Finalize on success
|
|
ROLLBACK - Restore snapshot on failure
|
|
`);
|
|
process.exit(0);
|
|
}
|
|
|
|
// Handle --list-snapshots
|
|
if (LIST_SNAPSHOTS) {
|
|
if (!HOST_FILTER) {
|
|
console.error('Error: --list-snapshots requires --host <name>');
|
|
process.exit(1);
|
|
}
|
|
const snapshots = snapshotManager.listSnapshots(HOST_FILTER);
|
|
if (snapshots.length === 0) {
|
|
console.log(`No snapshots found for host: ${HOST_FILTER}`);
|
|
} else {
|
|
console.log(`${CYAN}Snapshots for ${HOST_FILTER}:${NC}`);
|
|
for (const snap of snapshots) {
|
|
const statusColor = snap.status === 'success' ? GREEN : snap.status === 'failed' ? RED : YELLOW;
|
|
console.log(` ${DIM}${snap.timestamp}${NC} ${statusColor}${snap.status}${NC} ${snap.createdAt}`);
|
|
}
|
|
}
|
|
process.exit(0);
|
|
}
|
|
|
|
// Handle --show-snapshot
|
|
if (SHOW_SNAPSHOT) {
|
|
if (!HOST_FILTER) {
|
|
console.error('Error: --show-snapshot requires --host <name>');
|
|
process.exit(1);
|
|
}
|
|
const snapshot = snapshotManager.getSnapshot(HOST_FILTER, parseInt(SHOW_SNAPSHOT, 10));
|
|
if (!snapshot) {
|
|
console.error(`Snapshot not found: ${SHOW_SNAPSHOT} for host ${HOST_FILTER}`);
|
|
process.exit(1);
|
|
}
|
|
console.log(`${CYAN}Snapshot ${SHOW_SNAPSHOT} for ${HOST_FILTER}:${NC}`);
|
|
console.log(`${DIM}Metadata:${NC}`);
|
|
console.log(JSON.stringify(snapshot.metadata, null, 2));
|
|
console.log(`${DIM}Host Data:${NC}`);
|
|
console.log(stringifyYaml(snapshot.hostData));
|
|
process.exit(0);
|
|
}
|
|
|
|
// Statistics
|
|
const stats = {
|
|
hosts: 0,
|
|
features: 0,
|
|
ok: 0,
|
|
drift: 0,
|
|
fixed: 0,
|
|
failed: 0,
|
|
skipped: 0,
|
|
unreachable: 0,
|
|
verified: 0,
|
|
verifyFailed: 0,
|
|
rolledBack: 0,
|
|
};
|
|
|
|
/**
|
|
* Resolve vault reference to actual path
|
|
*/
|
|
function resolveVaultRef(ref) {
|
|
if (!ref) return '';
|
|
if (ref.startsWith('vault://ssh-keys/')) {
|
|
return `${process.env.HOME}/.ssh/${ref.replace('vault://ssh-keys/', '')}`;
|
|
}
|
|
return ref;
|
|
}
|
|
|
|
/**
|
|
* Check if user needs sudo
|
|
*/
|
|
function needsSudo(host) {
|
|
const user = host.ssh?.user || 'root';
|
|
return user !== 'root';
|
|
}
|
|
|
|
/**
|
|
* Wrap command with sudo if needed
|
|
*/
|
|
function wrapSudo(host, command) {
|
|
if (needsSudo(host)) {
|
|
return `sudo bash -c '${command.replace(/'/g, "'\\''")}'`;
|
|
}
|
|
return command;
|
|
}
|
|
|
|
/**
|
|
* SSH execution helper
|
|
*/
|
|
function sshExec(host, command, options = {}) {
|
|
const { sshHost, sshUser, sshKey, sshPort = 22 } = host.ssh || {};
|
|
const resolvedHost = sshHost || host.ssh?.ip || host.ssh?.host;
|
|
const resolvedUser = sshUser || host.ssh?.user || 'root';
|
|
const resolvedKey = resolveVaultRef(host.ssh?.keyRef);
|
|
|
|
// Wrap command in sudo if not root
|
|
const finalCommand = options.noSudo ? command : wrapSudo(host, command);
|
|
|
|
if (resolvedHost === 'localhost') {
|
|
try {
|
|
const result = spawnSync('bash', ['-c', finalCommand], {
|
|
encoding: 'utf-8',
|
|
timeout: options.timeout || 30000,
|
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
});
|
|
return {
|
|
success: result.status === 0,
|
|
stdout: result.stdout?.trim() || '',
|
|
stderr: result.stderr?.trim() || '',
|
|
code: result.status,
|
|
};
|
|
} catch (err) {
|
|
return { success: false, error: err.message };
|
|
}
|
|
}
|
|
|
|
try {
|
|
const sshArgs = [
|
|
'-o', 'ConnectTimeout=10',
|
|
'-o', 'StrictHostKeyChecking=no',
|
|
'-o', 'BatchMode=yes',
|
|
];
|
|
|
|
if (resolvedKey && existsSync(resolvedKey)) {
|
|
sshArgs.push('-i', resolvedKey);
|
|
}
|
|
if (sshPort !== 22) {
|
|
sshArgs.push('-p', String(sshPort));
|
|
}
|
|
|
|
sshArgs.push(`${resolvedUser}@${resolvedHost}`, finalCommand);
|
|
|
|
const result = spawnSync('ssh', sshArgs, {
|
|
encoding: 'utf-8',
|
|
timeout: options.timeout || 30000,
|
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
});
|
|
|
|
return {
|
|
success: result.status === 0,
|
|
stdout: result.stdout?.trim() || '',
|
|
stderr: result.stderr?.trim() || '',
|
|
code: result.status,
|
|
};
|
|
} catch (err) {
|
|
return { success: false, error: err.message };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Feature Probes - Detect current state
|
|
*/
|
|
const probes = {
|
|
hostname: async (host) => {
|
|
const result = sshExec(host, 'echo "$(hostname -s)|$(hostname -f 2>/dev/null || hostname)"', { noSudo: true });
|
|
if (!result.success) return { error: 'SSH failed' };
|
|
|
|
const [short, fqdn] = result.stdout.split('|');
|
|
return { short, fqdn };
|
|
},
|
|
|
|
packages: async (host) => {
|
|
const osFamily = host.os?.family;
|
|
|
|
if (osFamily === 'atomic') {
|
|
return { installed: [], missing: [], forbidden: [], note: 'Immutable OS - packages managed via rpm-ostree' };
|
|
}
|
|
|
|
const cmd = osFamily === 'debian'
|
|
? "dpkg-query -W -f='${Package}\\n' 2>/dev/null"
|
|
: "rpm -qa --queryformat '%{NAME}\\n' 2>/dev/null";
|
|
|
|
const result = sshExec(host, cmd, { timeout: 60000, noSudo: true });
|
|
if (!result.success) return { error: 'Failed to query packages' };
|
|
|
|
const installed = result.stdout.split('\n').filter(Boolean);
|
|
return { installed };
|
|
},
|
|
|
|
services: async (host) => {
|
|
// Get expected services to check (templated services like wg-quick@wg0 don't appear in list-unit-files)
|
|
const expectedServices = host.features?.services?.state?.enabled || [];
|
|
const enabled = [];
|
|
|
|
// Check each expected service individually
|
|
for (const svc of expectedServices) {
|
|
const result = sshExec(host, `systemctl is-enabled ${svc} 2>/dev/null || echo "disabled"`, { noSudo: true });
|
|
if (result.success && result.stdout.trim() === 'enabled') {
|
|
enabled.push(svc);
|
|
}
|
|
}
|
|
|
|
return { enabled };
|
|
},
|
|
|
|
firewall: async (host) => {
|
|
const firewallType = host.features?.firewall?.state?.type;
|
|
|
|
if (firewallType === 'ufw') {
|
|
const result = sshExec(host, 'ufw status verbose 2>/dev/null || echo "inactive"', { noSudo: true });
|
|
return { type: 'ufw', status: result.stdout };
|
|
} else if (firewallType === 'firewalld') {
|
|
const result = sshExec(host, 'firewall-cmd --state 2>/dev/null && firewall-cmd --list-all 2>/dev/null || echo "inactive"', { noSudo: true });
|
|
return { type: 'firewalld', status: result.stdout };
|
|
}
|
|
|
|
return { type: 'unknown', status: 'not configured' };
|
|
},
|
|
|
|
vpn: async (host) => {
|
|
const result = sshExec(host, 'wg show 2>/dev/null || echo "not configured"', { noSudo: true });
|
|
if (!result.success) return { error: 'Failed to query WireGuard' };
|
|
|
|
const hasInterface = !result.stdout.includes('not configured') && result.stdout.length > 0;
|
|
return { configured: hasInterface, status: result.stdout };
|
|
},
|
|
|
|
agent: async (host) => {
|
|
// Check new service first
|
|
const newResult = sshExec(host, 'systemctl is-active host-status-monitor 2>/dev/null', { noSudo: true });
|
|
if (newResult.success && newResult.stdout === 'active') {
|
|
return { running: true, status: 'active', legacy: false };
|
|
}
|
|
|
|
// Check for new service file existence
|
|
const newExists = sshExec(host, 'systemctl list-unit-files host-status-monitor.service 2>/dev/null | grep -q host-status-monitor && echo yes || echo no', { noSudo: true });
|
|
if (newExists.stdout === 'yes') {
|
|
return { running: false, status: 'inactive', legacy: false };
|
|
}
|
|
|
|
// Check for legacy service
|
|
const legacyResult = sshExec(host, 'systemctl is-active host-agent 2>/dev/null', { noSudo: true });
|
|
if (legacyResult.success && legacyResult.stdout === 'active') {
|
|
return { running: true, status: 'legacy-active', legacy: true };
|
|
}
|
|
|
|
const legacyExists = sshExec(host, 'systemctl list-unit-files host-agent.service 2>/dev/null | grep -q host-agent && echo yes || echo no', { noSudo: true });
|
|
if (legacyExists.stdout === 'yes') {
|
|
return { running: false, status: 'legacy-inactive', legacy: true };
|
|
}
|
|
|
|
return { running: false, status: 'not-installed', legacy: false };
|
|
},
|
|
};
|
|
|
|
/**
|
|
* Feature Appliers - Apply desired state
|
|
*/
|
|
const appliers = {
|
|
hostname: async (host, desired, current) => {
|
|
const method = desired.state?.method || host.hostnameMethod;
|
|
const shortName = desired.state?.short || host.hostname;
|
|
const fqdn = desired.state?.fqdn || host.fqdn;
|
|
|
|
// Read the set-hostname.sh script
|
|
const setHostnameScript = join(__dirname, 'set-hostname.sh');
|
|
if (!existsSync(setHostnameScript)) {
|
|
return { success: false, error: 'set-hostname.sh not found' };
|
|
}
|
|
|
|
const script = readFileSync(setHostnameScript, 'utf-8');
|
|
const resolvedHost = host.ssh?.ip || host.ssh?.host;
|
|
const resolvedUser = host.ssh?.user || 'root';
|
|
const resolvedKey = resolveVaultRef(host.ssh?.keyRef);
|
|
|
|
if (resolvedHost === 'localhost') {
|
|
console.log(` ${YELLOW}Skipping localhost - run manually with sudo${NC}`);
|
|
return { success: false, error: 'localhost requires manual sudo' };
|
|
}
|
|
|
|
const sshArgs = ['-o', 'StrictHostKeyChecking=no'];
|
|
if (resolvedKey && existsSync(resolvedKey)) {
|
|
sshArgs.push('-i', resolvedKey);
|
|
}
|
|
|
|
// Use sudo for non-root users
|
|
const bashCmd = needsSudo(host)
|
|
? `sudo bash -s -- '${shortName}' '${fqdn}' '${method}'`
|
|
: `bash -s -- '${shortName}' '${fqdn}' '${method}'`;
|
|
|
|
sshArgs.push(`${resolvedUser}@${resolvedHost}`, bashCmd);
|
|
|
|
const result = spawnSync('ssh', sshArgs, {
|
|
input: script,
|
|
encoding: 'utf-8',
|
|
timeout: 30000,
|
|
});
|
|
|
|
if (VERBOSE) {
|
|
console.log(` ${DIM}${result.stdout}${NC}`);
|
|
}
|
|
|
|
return { success: result.status === 0, output: result.stdout };
|
|
},
|
|
|
|
packages: async (host, desired, current) => {
|
|
if (host.os?.family === 'atomic') {
|
|
return { success: true, note: 'Skipped - immutable OS' };
|
|
}
|
|
|
|
const required = desired.state?.required || [];
|
|
const forbidden = desired.state?.forbidden || [];
|
|
const installed = current.installed || [];
|
|
|
|
const missing = required.filter(pkg => !installed.includes(pkg));
|
|
const toRemove = forbidden.filter(pkg => installed.includes(pkg));
|
|
|
|
if (missing.length === 0 && toRemove.length === 0) {
|
|
return { success: true, note: 'No package changes needed' };
|
|
}
|
|
|
|
const osFamily = host.os?.family;
|
|
let cmd = '';
|
|
|
|
if (osFamily === 'debian') {
|
|
if (missing.length > 0) {
|
|
cmd += `DEBIAN_FRONTEND=noninteractive apt-get install -y ${missing.join(' ')}`;
|
|
}
|
|
if (toRemove.length > 0) {
|
|
cmd += cmd ? ' && ' : '';
|
|
cmd += `apt-get remove -y ${toRemove.join(' ')}`;
|
|
}
|
|
} else {
|
|
if (missing.length > 0) {
|
|
cmd += `dnf install -y ${missing.join(' ')}`;
|
|
}
|
|
if (toRemove.length > 0) {
|
|
cmd += cmd ? ' && ' : '';
|
|
cmd += `dnf remove -y ${toRemove.join(' ')}`;
|
|
}
|
|
}
|
|
|
|
const result = sshExec(host, cmd, { timeout: 300000 });
|
|
return { success: result.success, output: result.stdout, missing, removed: toRemove };
|
|
},
|
|
|
|
services: async (host, desired, current) => {
|
|
const shouldBeEnabled = desired.state?.enabled || [];
|
|
const shouldBeDisabled = desired.state?.disabled || [];
|
|
const currentEnabled = current.enabled || [];
|
|
|
|
const toEnable = shouldBeEnabled.filter(svc => !currentEnabled.includes(svc));
|
|
const toDisable = shouldBeDisabled.filter(svc => currentEnabled.includes(svc));
|
|
|
|
if (toEnable.length === 0 && toDisable.length === 0) {
|
|
return { success: true, note: 'No service changes needed' };
|
|
}
|
|
|
|
let cmd = '';
|
|
if (toEnable.length > 0) {
|
|
cmd += `systemctl enable --now ${toEnable.join(' ')}`;
|
|
}
|
|
if (toDisable.length > 0) {
|
|
cmd += cmd ? ' && ' : '';
|
|
cmd += `systemctl disable --now ${toDisable.join(' ')}`;
|
|
}
|
|
|
|
const result = sshExec(host, cmd, { timeout: 60000 });
|
|
return { success: result.success, output: result.stdout, enabled: toEnable, disabled: toDisable };
|
|
},
|
|
|
|
firewall: async (host, desired, current) => {
|
|
// For now, just report - firewall requires careful handling
|
|
return { success: true, note: 'Firewall changes require manual review' };
|
|
},
|
|
|
|
vpn: async (host, desired, current) => {
|
|
// VPN configuration is complex - report only
|
|
return { success: true, note: 'VPN changes require manual review' };
|
|
},
|
|
|
|
agent: async (host, desired, current) => {
|
|
const resolvedHost = host.ssh?.ip || host.ssh?.host;
|
|
const resolvedUser = host.ssh?.user || 'root';
|
|
const resolvedKey = resolveVaultRef(host.ssh?.keyRef);
|
|
|
|
// Determine what action to take
|
|
let action = 'reconcile';
|
|
if (current.legacy) {
|
|
action = 'migrate';
|
|
} else if (current.status === 'not-installed') {
|
|
action = 'deploy';
|
|
}
|
|
|
|
// For deploy action, always run deploy.sh locally (it handles SSH internally)
|
|
if (action === 'deploy') {
|
|
console.log(` Deploying to ${host.id}...`);
|
|
const deployScript = join(__dirname, '../codebase/features/status-dashboard/host-status-monitor/deploy.sh');
|
|
|
|
if (!existsSync(deployScript)) {
|
|
return { success: false, error: 'deploy.sh not found' };
|
|
}
|
|
|
|
const result = spawnSync('bash', [deployScript, host.id], {
|
|
encoding: 'utf-8',
|
|
timeout: 300000,
|
|
cwd: join(__dirname, '../codebase/features/status-dashboard/host-status-monitor'),
|
|
});
|
|
|
|
if (VERBOSE || result.status !== 0) {
|
|
console.log(` ${DIM}${result.stdout}${NC}`);
|
|
if (result.stderr) console.log(` ${RED}${result.stderr}${NC}`);
|
|
}
|
|
|
|
return { success: result.status === 0, output: result.stdout, action };
|
|
}
|
|
|
|
// Read the host-status-monitor.sh script for migrate/reconcile
|
|
const agentScript = join(__dirname, '../reconciliation/services/host-status-monitor.sh');
|
|
if (!existsSync(agentScript)) {
|
|
return { success: false, error: 'host-status-monitor.sh not found' };
|
|
}
|
|
|
|
const script = readFileSync(agentScript, 'utf-8');
|
|
|
|
if (resolvedHost === 'localhost') {
|
|
// Local execution for migrate/reconcile
|
|
console.log(` Running ${action} locally...`);
|
|
const result = spawnSync('bash', ['-c', `
|
|
source <(cat <<'SCRIPT'
|
|
${script}
|
|
SCRIPT
|
|
)
|
|
case "${action}" in
|
|
migrate)
|
|
host_status_monitor_migrate "${host.id}"
|
|
# After migration, run deploy
|
|
cd "$(dirname "${BASH_SOURCE[0]}")/../codebase/features/status-dashboard/host-status-monitor"
|
|
./deploy.sh "${host.id}"
|
|
;;
|
|
reconcile)
|
|
host_status_monitor_reconcile "${host.id}" "enabled"
|
|
;;
|
|
esac
|
|
`], {
|
|
encoding: 'utf-8',
|
|
timeout: 300000,
|
|
cwd: __dirname,
|
|
env: { ...process.env, RECONCILE_ROOT: __dirname },
|
|
});
|
|
|
|
if (VERBOSE) {
|
|
console.log(` ${DIM}${result.stdout}${NC}`);
|
|
}
|
|
|
|
return { success: result.status === 0, output: result.stdout, action };
|
|
}
|
|
|
|
// Remote execution for migrate/reconcile
|
|
console.log(` Running ${action} on ${resolvedHost}...`);
|
|
|
|
const sshArgs = ['-o', 'StrictHostKeyChecking=no', '-o', 'BatchMode=yes'];
|
|
if (resolvedKey && existsSync(resolvedKey)) {
|
|
sshArgs.push('-i', resolvedKey);
|
|
}
|
|
|
|
// Use sudo for non-root users
|
|
const bashCmd = needsSudo(host)
|
|
? `sudo bash -s`
|
|
: `bash -s`;
|
|
|
|
sshArgs.push(`${resolvedUser}@${resolvedHost}`, bashCmd);
|
|
|
|
// Build remote script for migrate/reconcile
|
|
const remoteScript = `
|
|
${script}
|
|
|
|
case "${action}" in
|
|
migrate)
|
|
host_status_monitor_migrate "${host.id}"
|
|
;;
|
|
reconcile)
|
|
host_status_monitor_reconcile "${host.id}" "enabled"
|
|
;;
|
|
esac
|
|
`;
|
|
|
|
const sshResult = spawnSync('ssh', sshArgs, {
|
|
input: remoteScript,
|
|
encoding: 'utf-8',
|
|
timeout: 300000,
|
|
});
|
|
|
|
if (VERBOSE) {
|
|
console.log(` ${DIM}${sshResult.stdout}${NC}`);
|
|
}
|
|
|
|
// For migrate, also run deploy.sh locally after remote migration
|
|
if (action === 'migrate' && sshResult.status === 0) {
|
|
console.log(` Deploying new service to ${host.id}...`);
|
|
const deployScript = join(__dirname, '../codebase/features/status-dashboard/host-status-monitor/deploy.sh');
|
|
|
|
const deployResult = spawnSync('bash', [deployScript, host.id], {
|
|
encoding: 'utf-8',
|
|
timeout: 300000,
|
|
cwd: join(__dirname, '../codebase/features/status-dashboard/host-status-monitor'),
|
|
});
|
|
|
|
if (VERBOSE || deployResult.status !== 0) {
|
|
console.log(` ${DIM}${deployResult.stdout}${NC}`);
|
|
if (deployResult.stderr) console.log(` ${RED}${deployResult.stderr}${NC}`);
|
|
}
|
|
|
|
return { success: deployResult.status === 0, output: deployResult.stdout, action };
|
|
}
|
|
|
|
return { success: sshResult.status === 0, output: sshResult.stdout, action };
|
|
},
|
|
};
|
|
|
|
/**
|
|
* Compare current state to desired state
|
|
*/
|
|
function compareState(feature, desired, current) {
|
|
const diffs = [];
|
|
|
|
if (feature === 'hostname') {
|
|
if (current.short !== desired.state?.short) {
|
|
diffs.push({ field: 'short', current: current.short, desired: desired.state?.short });
|
|
}
|
|
if (current.fqdn !== desired.state?.fqdn) {
|
|
diffs.push({ field: 'fqdn', current: current.fqdn, desired: desired.state?.fqdn });
|
|
}
|
|
} else if (feature === 'packages') {
|
|
const required = desired.state?.required || [];
|
|
const installed = current.installed || [];
|
|
const missing = required.filter(pkg => !installed.includes(pkg));
|
|
|
|
if (missing.length > 0) {
|
|
diffs.push({ field: 'missing', current: 'not installed', desired: missing.join(', ') });
|
|
}
|
|
} else if (feature === 'services') {
|
|
const shouldBeEnabled = desired.state?.enabled || [];
|
|
const currentEnabled = current.enabled || [];
|
|
const notEnabled = shouldBeEnabled.filter(svc => !currentEnabled.includes(svc));
|
|
|
|
if (notEnabled.length > 0) {
|
|
diffs.push({ field: 'not_enabled', current: 'disabled', desired: notEnabled.join(', ') });
|
|
}
|
|
} else if (feature === 'agent') {
|
|
// Check for legacy service that needs migration
|
|
if (current.legacy) {
|
|
diffs.push({
|
|
field: 'service',
|
|
current: 'host-agent (legacy)',
|
|
desired: 'host-status-monitor',
|
|
});
|
|
} else if (!current.running && current.status !== 'not-installed') {
|
|
diffs.push({
|
|
field: 'status',
|
|
current: current.status,
|
|
desired: 'active',
|
|
});
|
|
} else if (current.status === 'not-installed') {
|
|
diffs.push({
|
|
field: 'status',
|
|
current: 'not installed',
|
|
desired: 'active',
|
|
});
|
|
}
|
|
}
|
|
|
|
return diffs;
|
|
}
|
|
|
|
/**
|
|
* Create SSH executor bound to a host (for verification/rollback engines)
|
|
*/
|
|
function createSshExecForHost(host) {
|
|
return (command, options = {}) => sshExec(host, command, options);
|
|
}
|
|
|
|
/**
|
|
* Initialize verification and rollback engines
|
|
*/
|
|
const verificationEngine = new VerificationEngine(probes);
|
|
const rollbackExecutor = new RollbackExecutor(appliers, null);
|
|
|
|
/**
|
|
* Handle verification failure - prompt for rollback or auto-rollback
|
|
*/
|
|
async function handleVerifyFailure(host, hostFile, transaction, verifyResults) {
|
|
console.log(` ${RED}✗ VERIFICATION FAILED${NC}`);
|
|
|
|
for (const failure of verifyResults.results.failed) {
|
|
console.log(` ${failure.feature}: ${failure.reason}`);
|
|
if (failure.diffs) {
|
|
for (const diff of failure.diffs) {
|
|
console.log(` ${diff.field}: expected ${MAGENTA}${diff.expected}${NC}, got ${DIM}${diff.actual}${NC}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (NO_ROLLBACK) {
|
|
console.log(` ${YELLOW}⚠ Rollback disabled (--no-rollback)${NC}`);
|
|
transactionManager.abortTransaction(host.id, 'verification-failed-no-rollback');
|
|
return false;
|
|
}
|
|
|
|
// Check rollbackability
|
|
const appliedFeatureNames = transaction.appliedFeatures.map(f => f.feature);
|
|
const rollbackAnalysis = rollbackExecutor.analyzeRollbackability(appliedFeatureNames);
|
|
|
|
if (rollbackAnalysis.irreversible.length > 0) {
|
|
console.log(` ${YELLOW}⚠ Some features cannot be rolled back: ${rollbackAnalysis.irreversible.join(', ')}${NC}`);
|
|
}
|
|
|
|
let shouldRollback = AUTO_ROLLBACK;
|
|
|
|
if (!shouldRollback && !AUTO_ROLLBACK) {
|
|
// In non-interactive mode, don't rollback unless explicitly requested
|
|
console.log(` ${YELLOW}⚠ Use --auto-rollback to automatically restore previous state${NC}`);
|
|
transactionManager.abortTransaction(host.id, 'verification-failed-user-declined');
|
|
return false;
|
|
}
|
|
|
|
if (shouldRollback) {
|
|
console.log(` ${BLUE}Rolling back to previous state...${NC}`);
|
|
|
|
const snapshot = snapshotManager.getLatestSnapshot(host.id);
|
|
if (!snapshot) {
|
|
console.log(` ${RED}✗ No snapshot available for rollback${NC}`);
|
|
transactionManager.abortTransaction(host.id, 'no-snapshot-for-rollback');
|
|
return false;
|
|
}
|
|
|
|
// Update rollback executor with host-specific SSH
|
|
const hostSshExec = createSshExecForHost(host);
|
|
const localRollbackExecutor = new RollbackExecutor(appliers, hostSshExec);
|
|
|
|
const rollbackResult = await localRollbackExecutor.executeRollback(
|
|
host,
|
|
snapshot.hostData,
|
|
transaction.appliedFeatures
|
|
);
|
|
|
|
if (rollbackResult.success) {
|
|
console.log(` ${GREEN}✓ Rolled back successfully${NC}`);
|
|
console.log(` ${rollbackResult.summary}`);
|
|
stats.rolledBack++;
|
|
await transactionManager.rollbackTransaction(host.id, 'verification-failed', rollbackResult);
|
|
return true;
|
|
} else {
|
|
console.log(` ${RED}✗ Rollback failed${NC}`);
|
|
console.log(` ${rollbackResult.summary}`);
|
|
transactionManager.abortTransaction(host.id, 'rollback-failed');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Process a single host
|
|
*/
|
|
async function processHost(hostFile) {
|
|
const content = readFileSync(hostFile, 'utf-8');
|
|
const host = parseYaml(content);
|
|
|
|
if (HOST_FILTER && host.id !== HOST_FILTER) {
|
|
return null;
|
|
}
|
|
|
|
stats.hosts++;
|
|
|
|
console.log(`\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}`);
|
|
console.log(`${CYAN}[${host.id}]${NC} ${host.fqdn}`);
|
|
console.log(`${DIM} OS: ${host.os?.name} ${host.os?.version} | Method: ${host.hostnameMethod}${NC}`);
|
|
|
|
// Test SSH connectivity first (no sudo needed for simple ping)
|
|
const pingResult = sshExec(host, 'echo ok', { noSudo: true });
|
|
if (!pingResult.success || pingResult.stdout !== 'ok') {
|
|
console.log(` ${RED}✗ Unreachable${NC}`);
|
|
stats.unreachable++;
|
|
return { hostId: host.id, status: 'unreachable' };
|
|
}
|
|
|
|
const features = host.features || {};
|
|
const results = [];
|
|
const appliedWithHashes = []; // Track features with expected state for verification
|
|
const reconciliation = {
|
|
lastRun: new Date().toISOString(),
|
|
lastSuccess: null,
|
|
status: 'pending',
|
|
appliedFeatures: [],
|
|
failedFeatures: [],
|
|
drift: [],
|
|
verification: {
|
|
enabled: true,
|
|
verificationStatus: 'pending',
|
|
featureHashes: {},
|
|
},
|
|
};
|
|
|
|
// === PHASE 1: Initial probe for all features (for snapshot) ===
|
|
const probeResults = {};
|
|
const featureList = ['hostname', 'packages', 'services', 'firewall', 'vpn', 'agent'];
|
|
|
|
for (const featureName of featureList) {
|
|
const featureConfig = features[featureName];
|
|
if (!featureConfig?.enabled) continue;
|
|
|
|
const probe = probes[featureName];
|
|
if (probe) {
|
|
probeResults[featureName] = await probe(host);
|
|
}
|
|
}
|
|
|
|
// === PHASE 2: Begin transaction (snapshot pre-state) ===
|
|
let transaction = null;
|
|
if (APPLY_MODE && !VERIFY_ONLY) {
|
|
try {
|
|
transaction = await transactionManager.beginTransaction(host.id, hostFile, host, probeResults);
|
|
if (VERBOSE) console.log(` ${DIM}Transaction started: ${transaction.snapshot.timestamp}${NC}`);
|
|
} catch (err) {
|
|
console.log(` ${RED}✗ Could not start transaction: ${err.message}${NC}`);
|
|
return { hostId: host.id, status: 'locked' };
|
|
}
|
|
}
|
|
|
|
// === PHASE 3: Process each feature ===
|
|
for (const featureName of featureList) {
|
|
if (FEATURE_FILTER && featureName !== FEATURE_FILTER) continue;
|
|
|
|
const featureConfig = features[featureName];
|
|
if (!featureConfig?.enabled) {
|
|
if (VERBOSE) console.log(` ${DIM}[${featureName}] Disabled${NC}`);
|
|
continue;
|
|
}
|
|
|
|
stats.features++;
|
|
console.log(` ${BLUE}[${featureName}]${NC}`);
|
|
|
|
// Use cached probe result or re-probe
|
|
const current = probeResults[featureName] || await probes[featureName]?.(host);
|
|
if (!current || current.error) {
|
|
console.log(` ${RED}Probe failed: ${current?.error || 'No probe'}${NC}`);
|
|
stats.failed++;
|
|
reconciliation.failedFeatures.push({ feature: featureName, error: current?.error || 'No probe' });
|
|
continue;
|
|
}
|
|
|
|
// Compare to desired state
|
|
const diffs = compareState(featureName, featureConfig, current);
|
|
|
|
if (diffs.length === 0) {
|
|
console.log(` ${GREEN}✓ OK${NC}`);
|
|
stats.ok++;
|
|
|
|
// Hash current state for verification baseline
|
|
const stateHash = hashFeatureState(current);
|
|
reconciliation.verification.featureHashes[featureName] = stateHash;
|
|
reconciliation.appliedFeatures.push(featureName);
|
|
continue;
|
|
}
|
|
|
|
// Report drift
|
|
console.log(` ${YELLOW}⚠ Drift detected:${NC}`);
|
|
for (const diff of diffs) {
|
|
console.log(` ${diff.field}: ${DIM}${diff.current}${NC} → ${MAGENTA}${diff.desired}${NC}`);
|
|
reconciliation.drift.push({
|
|
feature: featureName,
|
|
field: diff.field,
|
|
expected: diff.desired,
|
|
actual: diff.current,
|
|
});
|
|
}
|
|
stats.drift++;
|
|
|
|
// Apply fix if in apply mode (and not verify-only)
|
|
if (APPLY_MODE && !VERIFY_ONLY) {
|
|
const applier = appliers[featureName];
|
|
if (!applier) {
|
|
console.log(` ${YELLOW}No applier available${NC}`);
|
|
continue;
|
|
}
|
|
|
|
console.log(` ${BLUE}Applying fix...${NC}`);
|
|
const applyResult = await applier(host, featureConfig, current);
|
|
|
|
if (applyResult.success) {
|
|
console.log(` ${GREEN}✓ Fixed${NC}`);
|
|
stats.fixed++;
|
|
|
|
// Hash expected state for verification
|
|
const expectedState = featureConfig.state;
|
|
const expectedHash = hashFeatureState(expectedState);
|
|
reconciliation.verification.featureHashes[featureName] = expectedHash;
|
|
|
|
// Track for verification
|
|
appliedWithHashes.push({
|
|
feature: featureName,
|
|
expectedState,
|
|
expectedHash,
|
|
});
|
|
|
|
// Record in transaction
|
|
if (transaction) {
|
|
transactionManager.recordFeatureApplied(host.id, featureName, expectedHash, applyResult);
|
|
}
|
|
|
|
reconciliation.appliedFeatures.push(featureName);
|
|
} else {
|
|
console.log(` ${RED}✗ Failed: ${applyResult.error || 'Unknown error'}${NC}`);
|
|
stats.failed++;
|
|
reconciliation.failedFeatures.push({ feature: featureName, error: applyResult.error });
|
|
}
|
|
}
|
|
|
|
results.push({ feature: featureName, diffs, status: APPLY_MODE ? 'applied' : 'drift' });
|
|
}
|
|
|
|
// === PHASE 4: Verification (re-probe and validate) ===
|
|
if ((APPLY_MODE || VERIFY_ONLY) && appliedWithHashes.length > 0) {
|
|
console.log(` ${BLUE}[Verification]${NC}`);
|
|
|
|
const hostSshExec = createSshExecForHost(host);
|
|
const verifyResults = await verificationEngine.verifyTransaction(host, appliedWithHashes, hostSshExec);
|
|
|
|
if (verifyResults.success) {
|
|
console.log(` ${GREEN}✓ All changes verified${NC}`);
|
|
stats.verified++;
|
|
reconciliation.verification.verificationStatus = 'verified';
|
|
reconciliation.verification.lastVerified = new Date().toISOString();
|
|
|
|
// Compute transaction hash
|
|
reconciliation.verification.transactionHash = hashTransactionState(reconciliation.verification.featureHashes);
|
|
|
|
// Commit transaction
|
|
if (transaction) {
|
|
await transactionManager.commitTransaction(host.id, verifyResults);
|
|
}
|
|
} else {
|
|
stats.verifyFailed++;
|
|
reconciliation.verification.verificationStatus = 'failed';
|
|
|
|
// Handle verification failure (rollback if enabled)
|
|
if (transaction) {
|
|
await handleVerifyFailure(host, hostFile, transaction, verifyResults);
|
|
}
|
|
}
|
|
} else if (VERIFY_ONLY) {
|
|
console.log(` ${DIM}[Verification] No changes to verify${NC}`);
|
|
reconciliation.verification.verificationStatus = 'skipped';
|
|
}
|
|
|
|
// === PHASE 5: Update reconciliation state in YAML ===
|
|
if (APPLY_MODE || VERIFY_ONLY) {
|
|
reconciliation.status = reconciliation.failedFeatures.length === 0
|
|
? (reconciliation.verification.verificationStatus === 'verified' ? 'success' : 'partial')
|
|
: 'partial';
|
|
|
|
if (reconciliation.status === 'success') {
|
|
reconciliation.lastSuccess = reconciliation.lastRun;
|
|
}
|
|
|
|
host.reconciliation = reconciliation;
|
|
|
|
// Write back to file
|
|
const updatedContent = stringifyYaml(host, { lineWidth: 0 });
|
|
writeFileSync(hostFile, updatedContent);
|
|
if (VERBOSE) console.log(` ${DIM}Updated ${hostFile}${NC}`);
|
|
}
|
|
|
|
return { hostId: host.id, results, reconciliation };
|
|
}
|
|
|
|
/**
|
|
* Find all host YAML files
|
|
*/
|
|
function findHostFiles(dir) {
|
|
const files = [];
|
|
|
|
if (!existsSync(dir)) return files;
|
|
|
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
const fullPath = join(dir, entry.name);
|
|
if (entry.isDirectory() && entry.name !== 'schema') {
|
|
files.push(...findHostFiles(fullPath));
|
|
} else if (entry.name.endsWith('.yaml') && entry.name !== 'index.yaml') {
|
|
files.push(fullPath);
|
|
}
|
|
}
|
|
|
|
return files.sort();
|
|
}
|
|
|
|
// Main
|
|
console.log(`${BLUE}╔════════════════════════════════════════════════════════════╗${NC}`);
|
|
console.log(`${BLUE}║${NC} Infrastructure Reconciliation Engine ${BLUE}║${NC}`);
|
|
console.log(`${BLUE}╚════════════════════════════════════════════════════════════╝${NC}`);
|
|
console.log('');
|
|
const modeLabel = VERIFY_ONLY ? `${CYAN}VERIFY-ONLY${NC}` : APPLY_MODE ? `${GREEN}AUTO-APPLY${NC}` : `${YELLOW}DRY-RUN${NC}`;
|
|
console.log(` Mode: ${modeLabel}`);
|
|
console.log(` Hosts: ${HOST_FILTER ? `${CYAN}${HOST_FILTER}${NC}` : `${GREEN}all${NC}`}`);
|
|
console.log(` Features: ${FEATURE_FILTER ? `${CYAN}${FEATURE_FILTER}${NC}` : `${GREEN}all${NC}`}`);
|
|
if (APPLY_MODE) {
|
|
const rollbackMode = AUTO_ROLLBACK ? `${GREEN}auto${NC}` : NO_ROLLBACK ? `${YELLOW}disabled${NC}` : `${DIM}prompt${NC}`;
|
|
console.log(` Rollback: ${rollbackMode}`);
|
|
}
|
|
|
|
const hostFiles = findHostFiles(INVENTORY_PATH);
|
|
|
|
for (const hostFile of hostFiles) {
|
|
await processHost(hostFile);
|
|
}
|
|
|
|
// Summary
|
|
console.log('');
|
|
console.log(`${BLUE}╔════════════════════════════════════════════════════════════╗${NC}`);
|
|
console.log(`${BLUE}║${NC} Summary ${BLUE}║${NC}`);
|
|
console.log(`${BLUE}╚════════════════════════════════════════════════════════════╝${NC}`);
|
|
console.log(` Hosts processed: ${stats.hosts}`);
|
|
console.log(` Features checked: ${stats.features}`);
|
|
console.log(` ${GREEN}OK: ${stats.ok}${NC}`);
|
|
console.log(` ${YELLOW}Drift detected: ${stats.drift}${NC}`);
|
|
if (APPLY_MODE) {
|
|
console.log(` ${GREEN}Fixed: ${stats.fixed}${NC}`);
|
|
console.log(` ${GREEN}Verified: ${stats.verified}${NC}`);
|
|
if (stats.verifyFailed > 0) {
|
|
console.log(` ${RED}Verify failed: ${stats.verifyFailed}${NC}`);
|
|
}
|
|
if (stats.rolledBack > 0) {
|
|
console.log(` ${YELLOW}Rolled back: ${stats.rolledBack}${NC}`);
|
|
}
|
|
}
|
|
console.log(` ${RED}Failed: ${stats.failed}${NC}`);
|
|
console.log(` ${RED}Unreachable: ${stats.unreachable}${NC}`);
|
|
console.log(` ${DIM}Skipped: ${stats.skipped}${NC}`);
|
|
console.log('');
|
|
|
|
if (!APPLY_MODE && stats.drift > 0) {
|
|
console.log(`${CYAN}Run with --all --auto to fix detected drift${NC}`);
|
|
}
|
|
if (APPLY_MODE && stats.verifyFailed > 0 && !AUTO_ROLLBACK && !NO_ROLLBACK) {
|
|
console.log(`${CYAN}Use --auto-rollback for automatic rollback on verification failure${NC}`);
|
|
}
|
|
|
|
process.exit(stats.drift + stats.failed + stats.verifyFailed);
|