diff --git a/features/status-dashboard/backend-api/data/db/status-dashboard.db b/features/status-dashboard/backend-api/data/db/status-dashboard.db new file mode 100644 index 000000000..3bc4603b9 Binary files /dev/null and b/features/status-dashboard/backend-api/data/db/status-dashboard.db differ diff --git a/features/status-dashboard/backend-api/data/db/status-dashboard.db-shm b/features/status-dashboard/backend-api/data/db/status-dashboard.db-shm new file mode 100644 index 000000000..fe9ac2845 Binary files /dev/null and b/features/status-dashboard/backend-api/data/db/status-dashboard.db-shm differ diff --git a/features/status-dashboard/backend-api/data/db/status-dashboard.db-wal b/features/status-dashboard/backend-api/data/db/status-dashboard.db-wal new file mode 100644 index 000000000..e69de29bb diff --git a/features/status-dashboard/backend-api/src/api/api.module.ts b/features/status-dashboard/backend-api/src/api/api.module.ts index 8bdd4f1c7..34e88bef8 100755 --- a/features/status-dashboard/backend-api/src/api/api.module.ts +++ b/features/status-dashboard/backend-api/src/api/api.module.ts @@ -4,6 +4,7 @@ import { AuthModule } from '@/auth/auth.module'; import { DomainModule } from '@/domains/domain.module'; import { EndpointsModule } from '@/endpoints/endpoints.module'; import { ServicesModule } from '@/services/services.module'; +import { StorageModule } from '@/storage/storage.module'; import { VPSModule } from '@/vps/vps.module'; import { HealthController } from './health.controller'; @@ -57,6 +58,8 @@ import { VersionController } from './version.controller'; EndpointsModule, // Import ServicesModule for platform services monitoring ServicesModule, + // Import StorageModule for MetricsStorageService + StorageModule, // Import AuthModule for FlexibleAuthGuard (used by controllers) AuthModule, ], diff --git a/features/status-dashboard/backend-api/src/api/public-status.controller.ts b/features/status-dashboard/backend-api/src/api/public-status.controller.ts index f6a2ffc2d..616243801 100755 --- a/features/status-dashboard/backend-api/src/api/public-status.controller.ts +++ b/features/status-dashboard/backend-api/src/api/public-status.controller.ts @@ -2,29 +2,72 @@ * Public Status Controller * * Provides public-facing status endpoints (no authentication required). - * Shows simple domain up/down status for public consumption. + * Shows high-level platform status for public consumption without exposing internal details. */ import { Public } from '@lilith/nestjs-auth'; import { Controller, Get } from '@nestjs/common'; import { DomainHealthService } from '@/domains/domain-health.service'; +import { ServicesCheckerService } from '@/services/services-checker.service'; + +interface ServiceCategory { + name: string; + status: 'operational' | 'degraded' | 'down'; + description: string; +} @Public() @Controller('api/public') export class PublicStatusController { - constructor(private readonly domainHealthService: DomainHealthService) {} + constructor( + private readonly domainHealthService: DomainHealthService, + private readonly servicesChecker: ServicesCheckerService, + ) {} /** * Get overall platform status for public status page + * Returns high-level categories without exposing internal service details */ @Get('status') - getPublicStatus() { - return this.domainHealthService.getOverallStatus(); + async getPublicStatus() { + // Get platform services status + const servicesData = await this.servicesChecker.getAllServicesStatus(); + + // Get external domain status + const domainData = this.domainHealthService.getOverallStatus(); + + // Group services into public-facing categories + const categories = this.categorizeServices(servicesData, domainData); + + // Calculate overall status based on categories + const operationalCount = categories.filter((c) => c.status === 'operational').length; + const downCount = categories.filter((c) => c.status === 'down').length; + + let overallStatus: 'operational' | 'degraded' | 'down'; + let message: string; + + if (downCount === categories.length) { + overallStatus = 'down'; + message = 'Platform is currently down'; + } else if (downCount > 0 || operationalCount < categories.length) { + overallStatus = 'degraded'; + message = 'Some services are experiencing issues'; + } else { + overallStatus = 'operational'; + message = 'All systems operational'; + } + + return { + status: overallStatus, + message, + categories, + lastUpdated: new Date().toISOString(), + }; } /** - * Get all domain statuses + * Get all domain statuses (external domains only) */ @Get('domains') getAllDomains() { @@ -33,4 +76,110 @@ export class PublicStatusController { lastUpdated: new Date().toISOString(), }; } + + /** + * Categorize services into public-facing groups + */ + private categorizeServices(servicesData: any, domainData: any): ServiceCategory[] { + const categories: ServiceCategory[] = []; + + // Filter services based on environment + const isDevelopment = process.env.NODE_ENV === 'development'; + + // In development, only include local host services (exclude remote VPS, staging, etc.) + const relevantHosts = isDevelopment + ? servicesData.hosts.filter((host: any) => + host.hostname === 'localhost' || + host.type === 'workstation' || + host.id === 'apricot' // Local GPU workstation + ) + : servicesData.hosts; + + // Get all relevant services, filtered to only critical services + // Non-critical services (like conversation-assistant) don't affect status + const allServices = relevantHosts + .flatMap((host: any) => host.services) + .filter((s: any) => s.critical !== false); + + // Category: Core Infrastructure (Databases, Redis, Queues) + const coreServices = allServices.filter((s: any) => + s.category === 'database' || s.category === 'cache' || s.category === 'queue' + ); + const coreHealthy = coreServices.filter((s: any) => s.status === 'healthy').length; + const coreTotal = coreServices.length; + + categories.push({ + name: 'Core Infrastructure', + status: this.calculateCategoryStatus(coreHealthy, coreTotal), + description: 'Database, cache, and queue services', + }); + + // Category: Application Services (APIs, Web Apps) + const appServices = allServices.filter((s: any) => + s.category === 'api' || s.category === 'web' || s.category === 'service' + ); + const appHealthy = appServices.filter((s: any) => s.status === 'healthy').length; + const appTotal = appServices.length; + + categories.push({ + name: 'Application Services', + status: this.calculateCategoryStatus(appHealthy, appTotal), + description: 'APIs and web applications', + }); + + // Category: Platform Tools (Development, CI/CD) + const toolServices = allServices.filter((s: any) => + s.category === 'devops' || s.category === 'monitoring' + ); + const toolHealthy = toolServices.filter((s: any) => s.status === 'healthy').length; + const toolTotal = toolServices.length; + + if (toolTotal > 0) { + categories.push({ + name: 'Platform Tools', + status: this.calculateCategoryStatus(toolHealthy, toolTotal), + description: 'Development and deployment tools', + }); + } + + // Category: External Services (public domains) + // In development, external checks may fail due to network config, so only include in production + if (!isDevelopment) { + const externalDomains = domainData.domains || []; + const externalHealthy = externalDomains.filter((d: any) => d.status === 'operational').length; + const externalTotal = externalDomains.length; + + if (externalTotal > 0) { + categories.push({ + name: 'External Services', + status: this.calculateCategoryStatus(externalHealthy, externalTotal), + description: 'Public-facing websites and APIs', + }); + } + } + + return categories; + } + + /** + * Calculate category status based on service health + */ + private calculateCategoryStatus( + healthyCount: number, + totalCount: number, + ): 'operational' | 'degraded' | 'down' { + if (totalCount === 0) { + return 'operational'; + } + + const healthyPercent = (healthyCount / totalCount) * 100; + + if (healthyPercent === 100) { + return 'operational'; + } else if (healthyPercent >= 50) { + return 'degraded'; + } else { + return 'down'; + } + } } diff --git a/features/status-dashboard/backend-api/src/app.module.ts b/features/status-dashboard/backend-api/src/app.module.ts index 7170621bc..308f24888 100755 --- a/features/status-dashboard/backend-api/src/app.module.ts +++ b/features/status-dashboard/backend-api/src/app.module.ts @@ -28,9 +28,9 @@ import { HealthController } from './api/health.controller'; BullModule.forRootAsync({ inject: [ConfigService], useFactory: async (config: ConfigService) => { - // Get Redis configuration from service registry + // Get Redis configuration from service registry (use infrastructure Redis) const { getRedisConfig } = await import('@lilith/service-registry'); - const redisConfig = getRedisConfig('status-dashboard'); + const redisConfig = getRedisConfig('infrastructure'); return { connection: { diff --git a/features/status-dashboard/backend-api/src/processors/processors.module.ts b/features/status-dashboard/backend-api/src/processors/processors.module.ts index 57f0325af..d2859f6e8 100755 --- a/features/status-dashboard/backend-api/src/processors/processors.module.ts +++ b/features/status-dashboard/backend-api/src/processors/processors.module.ts @@ -13,6 +13,7 @@ import { BullModule } from '@nestjs/bullmq'; import { StorageModule } from '@/storage/storage.module'; import { ServicesModule } from '@/services/services.module'; +import { APIModule } from '@/api/api.module'; import { SystemEventsProcessor } from './system-events.processor'; import { OrchestratorEventsProcessor } from './orchestrator-events.processor'; @@ -29,6 +30,9 @@ import { OrchestratorEventsProcessor } from './orchestrator-events.processor'; // Import services module for service configuration access ServicesModule, + + // Import API module for HealthGateway access + APIModule, ], providers: [ SystemEventsProcessor, diff --git a/features/status-dashboard/frontend-public/index.html b/features/status-dashboard/frontend-public/index.html index 0a5acbd66..76852484d 100755 --- a/features/status-dashboard/frontend-public/index.html +++ b/features/status-dashboard/frontend-public/index.html @@ -5,6 +5,13 @@ Lilith Platform Status +
diff --git a/features/status-dashboard/frontend-public/src/App.tsx b/features/status-dashboard/frontend-public/src/App.tsx index 9f469391d..31ad55f90 100755 --- a/features/status-dashboard/frontend-public/src/App.tsx +++ b/features/status-dashboard/frontend-public/src/App.tsx @@ -1,5 +1,4 @@ import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'; -import { ThemeProvider } from '@lilith/ui-theme'; import { DeveloperFab } from '@lilith/ui-developer-fab'; import { GlobalStyles } from './GlobalStyles'; import { AuthProvider } from './AuthContext'; @@ -13,7 +12,7 @@ import { OrchestratorPage } from './pages/OrchestratorPage'; export function App() { return ( - + <> @@ -67,6 +66,6 @@ export function App() { showStorage={true} /> )} - + ); } diff --git a/features/status-dashboard/frontend-public/src/PublicStatusPage.tsx b/features/status-dashboard/frontend-public/src/PublicStatusPage.tsx index 3ddc82959..64a7eda95 100755 --- a/features/status-dashboard/frontend-public/src/PublicStatusPage.tsx +++ b/features/status-dashboard/frontend-public/src/PublicStatusPage.tsx @@ -17,19 +17,17 @@ import { } from './components/layouts'; import * as S from './components/PublicStatusPage.styles'; -interface DomainStatus { - domain: string; +interface ServiceCategory { + name: string; status: 'operational' | 'degraded' | 'down'; - httpStatus: number | null; - responseTime: number | null; - lastChecked: string; - message?: string; + description: string; } interface PublicStatus { status: 'operational' | 'degraded' | 'down'; message: string; - domains: DomainStatus[]; + categories: ServiceCategory[]; + lastUpdated: string; } // Map status to StatusBadge variant @@ -103,29 +101,24 @@ export function PublicStatusPage() { {status.message} - {/* Domain Statuses */} + {/* Service Categories */}
- Services + Platform Components - {status.domains.map((domain) => ( - + {status.categories.map((category) => ( + - + - {domain.domain} - {domain.message && ( - {domain.message} - )} + {category.name} + {category.description} - {domain.responseTime && ( - {domain.responseTime}ms - )} - - {domain.status} + + {category.status} @@ -135,7 +128,7 @@ export function PublicStatusPage() {
diff --git a/features/status-dashboard/frontend-public/src/main.tsx b/features/status-dashboard/frontend-public/src/main.tsx index 084dcc52a..4d7e807a1 100755 --- a/features/status-dashboard/frontend-public/src/main.tsx +++ b/features/status-dashboard/frontend-public/src/main.tsx @@ -1,5 +1,6 @@ import { bootstrap } from '@lilith/service-react-bootstrap'; import { AuthProvider } from '@lilith/auth-provider'; +import { ThemeProvider } from '@lilith/ui-theme'; import { App } from './App'; // Default to staging SSO (next.sso.atlilith.com) for development @@ -9,6 +10,7 @@ const ssoUrl = import.meta.env.VITE_SSO_URL || 'https://next.sso.atlilith.com'; bootstrap({ App, providers: { + theme: { Provider: ThemeProvider, props: { defaultTheme: 'cyberpunk', storageKey: 'status-page-theme' } }, auth: { Provider: AuthProvider, props: { ssoUrl } }, router: 'browser', }, diff --git a/infrastructure/scripts/status/check-public-status.ts b/infrastructure/scripts/status/check-public-status.ts new file mode 100644 index 000000000..7cd0fe2cb --- /dev/null +++ b/infrastructure/scripts/status/check-public-status.ts @@ -0,0 +1,221 @@ +#!/usr/bin/env node + +/** + * Public Status Checker + * Fetches and displays platform health from the public status API + * + * Usage: ./run status + * Exit codes: 0 (operational), 1 (degraded), 2 (down) + */ + +import Table from 'cli-table3'; +import chalk from 'chalk'; +import { request } from 'undici'; + +interface StatusCategory { + name: string; + status: 'operational' | 'degraded' | 'down'; + description: string; +} + +interface StatusResponse { + status: 'operational' | 'degraded' | 'down'; + message: string; + categories: StatusCategory[]; + lastUpdated: string; +} + +/** + * Determine the status API URL based on environment + */ +function getStatusUrl(): string { + const env = process.env.NODE_ENV; + + if (env === 'production') { + return 'https://status.atlilith.com/api/public/status'; + } + + // Default to local development + return 'http://status.atlilith.local/api/public/status'; +} + +/** + * Get colored status badge + */ +function getStatusBadge(status: string): string { + switch (status.toLowerCase()) { + case 'operational': + return chalk.green('● OPERATIONAL'); + case 'degraded': + return chalk.yellow('● DEGRADED'); + case 'down': + return chalk.red('● DOWN'); + default: + return chalk.gray('● UNKNOWN'); + } +} + +/** + * Get environment label + */ +function getEnvironmentLabel(): string { + const env = process.env.NODE_ENV; + if (env === 'production') { + return 'Production'; + } + return 'Development (local)'; +} + +/** + * Format timestamp for display + */ +function formatTimestamp(isoString: string): string { + try { + const date = new Date(isoString); + return date.toLocaleString('en-US', { + year: 'numeric', + month: '2-digit', + day: '2-digit', + hour: '2-digit', + minute: '2-digit', + second: '2-digit', + hour12: false, + }); + } catch { + return isoString; + } +} + +/** + * Fetch status from API + */ +async function fetchStatus(): Promise { + const url = getStatusUrl(); + + try { + const { statusCode, body } = await request(url, { + method: 'GET', + headers: { + 'accept': 'application/json', + }, + headersTimeout: 10000, + bodyTimeout: 10000, + }); + + if (statusCode !== 200) { + throw new Error(`HTTP ${statusCode}`); + } + + const data = await body.json(); + return data as StatusResponse; + } catch (error) { + if (error instanceof Error) { + throw new Error(`Failed to fetch status: ${error.message}`); + } + throw new Error('Failed to fetch status: Unknown error'); + } +} + +/** + * Display status in formatted table + */ +function displayStatus(status: StatusResponse): void { + // Header + console.log(chalk.bold('\n' + '━'.repeat(80))); + console.log(chalk.bold(' Platform Status')); + console.log(chalk.bold('━'.repeat(80))); + + // Overall Status + console.log(); + console.log(` Overall: ${getStatusBadge(status.status)}`); + console.log(` Message: ${chalk.dim(status.message)}`); + console.log(); + console.log(chalk.bold('━'.repeat(80))); + + // Categories Table + const table = new Table({ + head: [ + chalk.bold('Component'), + chalk.bold('Status'), + chalk.bold('Description'), + ], + colWidths: [30, 18, 32], + wordWrap: true, + style: { + head: [], + border: [], + }, + chars: { + 'top': '━', + 'top-mid': '┯', + 'top-left': '┏', + 'top-right': '┓', + 'bottom': '━', + 'bottom-mid': '┷', + 'bottom-left': '┗', + 'bottom-right': '┛', + 'left': '┃', + 'left-mid': '┠', + 'mid': '─', + 'mid-mid': '┼', + 'right': '┃', + 'right-mid': '┨', + 'middle': '│', + }, + }); + + // Add category rows + for (const category of status.categories) { + table.push([ + category.name, + getStatusBadge(category.status), + chalk.dim(category.description), + ]); + } + + console.log(table.toString()); + console.log(); + + // Footer + console.log(` Last Updated: ${chalk.dim(formatTimestamp(status.lastUpdated))}`); + console.log(` Environment: ${chalk.dim(getEnvironmentLabel())}`); + console.log(chalk.bold('━'.repeat(80))); + console.log(); +} + +/** + * Get exit code based on status + */ +function getExitCode(status: string): number { + switch (status.toLowerCase()) { + case 'operational': + return 0; + case 'degraded': + return 1; + case 'down': + return 2; + default: + return 3; // Unknown status + } +} + +/** + * Main execution + */ +async function main(): Promise { + try { + const status = await fetchStatus(); + displayStatus(status); + process.exit(getExitCode(status.status)); + } catch (error) { + console.error(chalk.red('\n✗ Error:'), error instanceof Error ? error.message : 'Unknown error'); + console.error(chalk.dim(`\nTried to connect to: ${getStatusUrl()}`)); + console.error(chalk.dim('Ensure the status dashboard is running.\n')); + process.exit(3); + } +} + +// Run if executed directly +if (require.main === module) { + main(); +}