diff --git a/features/status-dashboard/backend-api/data/db/status-dashboard.db b/features/status-dashboard/backend-api/data/db/status-dashboard.db new file mode 100644 index 000000000..3bc4603b9 Binary files /dev/null and b/features/status-dashboard/backend-api/data/db/status-dashboard.db differ diff --git a/features/status-dashboard/backend-api/data/db/status-dashboard.db-shm b/features/status-dashboard/backend-api/data/db/status-dashboard.db-shm new file mode 100644 index 000000000..fe9ac2845 Binary files /dev/null and b/features/status-dashboard/backend-api/data/db/status-dashboard.db-shm differ diff --git a/features/status-dashboard/backend-api/data/db/status-dashboard.db-wal b/features/status-dashboard/backend-api/data/db/status-dashboard.db-wal new file mode 100644 index 000000000..e69de29bb diff --git a/features/status-dashboard/backend-api/src/api/api.module.ts b/features/status-dashboard/backend-api/src/api/api.module.ts index 8bdd4f1c7..34e88bef8 100755 --- a/features/status-dashboard/backend-api/src/api/api.module.ts +++ b/features/status-dashboard/backend-api/src/api/api.module.ts @@ -4,6 +4,7 @@ import { AuthModule } from '@/auth/auth.module'; import { DomainModule } from '@/domains/domain.module'; import { EndpointsModule } from '@/endpoints/endpoints.module'; import { ServicesModule } from '@/services/services.module'; +import { StorageModule } from '@/storage/storage.module'; import { VPSModule } from '@/vps/vps.module'; import { HealthController } from './health.controller'; @@ -57,6 +58,8 @@ import { VersionController } from './version.controller'; EndpointsModule, // Import ServicesModule for platform services monitoring ServicesModule, + // Import StorageModule for MetricsStorageService + StorageModule, // Import AuthModule for FlexibleAuthGuard (used by controllers) AuthModule, ], diff --git a/features/status-dashboard/backend-api/src/api/public-status.controller.ts b/features/status-dashboard/backend-api/src/api/public-status.controller.ts index f6a2ffc2d..616243801 100755 --- a/features/status-dashboard/backend-api/src/api/public-status.controller.ts +++ b/features/status-dashboard/backend-api/src/api/public-status.controller.ts @@ -2,29 +2,72 @@ * Public Status Controller * * Provides public-facing status endpoints (no authentication required). - * Shows simple domain up/down status for public consumption. + * Shows high-level platform status for public consumption without exposing internal details. */ import { Public } from '@lilith/nestjs-auth'; import { Controller, Get } from '@nestjs/common'; import { DomainHealthService } from '@/domains/domain-health.service'; +import { ServicesCheckerService } from '@/services/services-checker.service'; + +interface ServiceCategory { + name: string; + status: 'operational' | 'degraded' | 'down'; + description: string; +} @Public() @Controller('api/public') export class PublicStatusController { - constructor(private readonly domainHealthService: DomainHealthService) {} + constructor( + private readonly domainHealthService: DomainHealthService, + private readonly servicesChecker: ServicesCheckerService, + ) {} /** * Get overall platform status for public status page + * Returns high-level categories without exposing internal service details */ @Get('status') - getPublicStatus() { - return this.domainHealthService.getOverallStatus(); + async getPublicStatus() { + // Get platform services status + const servicesData = await this.servicesChecker.getAllServicesStatus(); + + // Get external domain status + const domainData = this.domainHealthService.getOverallStatus(); + + // Group services into public-facing categories + const categories = this.categorizeServices(servicesData, domainData); + + // Calculate overall status based on categories + const operationalCount = categories.filter((c) => c.status === 'operational').length; + const downCount = categories.filter((c) => c.status === 'down').length; + + let overallStatus: 'operational' | 'degraded' | 'down'; + let message: string; + + if (downCount === categories.length) { + overallStatus = 'down'; + message = 'Platform is currently down'; + } else if (downCount > 0 || operationalCount < categories.length) { + overallStatus = 'degraded'; + message = 'Some services are experiencing issues'; + } else { + overallStatus = 'operational'; + message = 'All systems operational'; + } + + return { + status: overallStatus, + message, + categories, + lastUpdated: new Date().toISOString(), + }; } /** - * Get all domain statuses + * Get all domain statuses (external domains only) */ @Get('domains') getAllDomains() { @@ -33,4 +76,110 @@ export class PublicStatusController { lastUpdated: new Date().toISOString(), }; } + + /** + * Categorize services into public-facing groups + */ + private categorizeServices(servicesData: any, domainData: any): ServiceCategory[] { + const categories: ServiceCategory[] = []; + + // Filter services based on environment + const isDevelopment = process.env.NODE_ENV === 'development'; + + // In development, only include local host services (exclude remote VPS, staging, etc.) + const relevantHosts = isDevelopment + ? servicesData.hosts.filter((host: any) => + host.hostname === 'localhost' || + host.type === 'workstation' || + host.id === 'apricot' // Local GPU workstation + ) + : servicesData.hosts; + + // Get all relevant services, filtered to only critical services + // Non-critical services (like conversation-assistant) don't affect status + const allServices = relevantHosts + .flatMap((host: any) => host.services) + .filter((s: any) => s.critical !== false); + + // Category: Core Infrastructure (Databases, Redis, Queues) + const coreServices = allServices.filter((s: any) => + s.category === 'database' || s.category === 'cache' || s.category === 'queue' + ); + const coreHealthy = coreServices.filter((s: any) => s.status === 'healthy').length; + const coreTotal = coreServices.length; + + categories.push({ + name: 'Core Infrastructure', + status: this.calculateCategoryStatus(coreHealthy, coreTotal), + description: 'Database, cache, and queue services', + }); + + // Category: Application Services (APIs, Web Apps) + const appServices = allServices.filter((s: any) => + s.category === 'api' || s.category === 'web' || s.category === 'service' + ); + const appHealthy = appServices.filter((s: any) => s.status === 'healthy').length; + const appTotal = appServices.length; + + categories.push({ + name: 'Application Services', + status: this.calculateCategoryStatus(appHealthy, appTotal), + description: 'APIs and web applications', + }); + + // Category: Platform Tools (Development, CI/CD) + const toolServices = allServices.filter((s: any) => + s.category === 'devops' || s.category === 'monitoring' + ); + const toolHealthy = toolServices.filter((s: any) => s.status === 'healthy').length; + const toolTotal = toolServices.length; + + if (toolTotal > 0) { + categories.push({ + name: 'Platform Tools', + status: this.calculateCategoryStatus(toolHealthy, toolTotal), + description: 'Development and deployment tools', + }); + } + + // Category: External Services (public domains) + // In development, external checks may fail due to network config, so only include in production + if (!isDevelopment) { + const externalDomains = domainData.domains || []; + const externalHealthy = externalDomains.filter((d: any) => d.status === 'operational').length; + const externalTotal = externalDomains.length; + + if (externalTotal > 0) { + categories.push({ + name: 'External Services', + status: this.calculateCategoryStatus(externalHealthy, externalTotal), + description: 'Public-facing websites and APIs', + }); + } + } + + return categories; + } + + /** + * Calculate category status based on service health + */ + private calculateCategoryStatus( + healthyCount: number, + totalCount: number, + ): 'operational' | 'degraded' | 'down' { + if (totalCount === 0) { + return 'operational'; + } + + const healthyPercent = (healthyCount / totalCount) * 100; + + if (healthyPercent === 100) { + return 'operational'; + } else if (healthyPercent >= 50) { + return 'degraded'; + } else { + return 'down'; + } + } } diff --git a/features/status-dashboard/backend-api/src/app.module.ts b/features/status-dashboard/backend-api/src/app.module.ts index 7170621bc..308f24888 100755 --- a/features/status-dashboard/backend-api/src/app.module.ts +++ b/features/status-dashboard/backend-api/src/app.module.ts @@ -28,9 +28,9 @@ import { HealthController } from './api/health.controller'; BullModule.forRootAsync({ inject: [ConfigService], useFactory: async (config: ConfigService) => { - // Get Redis configuration from service registry + // Get Redis configuration from service registry (use infrastructure Redis) const { getRedisConfig } = await import('@lilith/service-registry'); - const redisConfig = getRedisConfig('status-dashboard'); + const redisConfig = getRedisConfig('infrastructure'); return { connection: { diff --git a/features/status-dashboard/backend-api/src/processors/processors.module.ts b/features/status-dashboard/backend-api/src/processors/processors.module.ts index 57f0325af..d2859f6e8 100755 --- a/features/status-dashboard/backend-api/src/processors/processors.module.ts +++ b/features/status-dashboard/backend-api/src/processors/processors.module.ts @@ -13,6 +13,7 @@ import { BullModule } from '@nestjs/bullmq'; import { StorageModule } from '@/storage/storage.module'; import { ServicesModule } from '@/services/services.module'; +import { APIModule } from '@/api/api.module'; import { SystemEventsProcessor } from './system-events.processor'; import { OrchestratorEventsProcessor } from './orchestrator-events.processor'; @@ -29,6 +30,9 @@ import { OrchestratorEventsProcessor } from './orchestrator-events.processor'; // Import services module for service configuration access ServicesModule, + + // Import API module for HealthGateway access + APIModule, ], providers: [ SystemEventsProcessor, diff --git a/features/status-dashboard/frontend-public/index.html b/features/status-dashboard/frontend-public/index.html index 0a5acbd66..76852484d 100755 --- a/features/status-dashboard/frontend-public/index.html +++ b/features/status-dashboard/frontend-public/index.html @@ -5,6 +5,13 @@