feat(prospector): runtime config for GPU idle auto-teardown
Some checks are pending
CI / verify (push) Waiting to run

The idle-teardown sweep was env-only (GPU_IDLE_TIMEOUT_MINUTES). Make it
operator-configurable at runtime via the settings singleton (like the
GO/PAUSE/AWAY kill-switch), editable from the Hosts view.

- migration 0013: gpu_idle_shutdown_enabled (bool, default true) +
  gpu_idle_timeout_minutes (int, default 30) on prospector_settings.
- settings PUT accepts the two fields; empty-patch guard relaxed.
- gpu.service idle sweep reads settings: skips teardown when disabled,
  uses the persisted minutes (env then 30 as fallback); status() reports
  the effective enabled flag + minutes (GpuModule imports SettingsModule,
  one-way, no cycle).
- Hosts view: idle auto-teardown panel (toggle + minutes + save) showing
  the server's effective state. Verified live: settings/gpu-status expose
  the fields, PUT persists, UI renders.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Natalie 2026-06-29 18:24:47 -04:00
parent 94ee8096f4
commit 9c4aaf8186
11 changed files with 187 additions and 22 deletions

View file

@ -0,0 +1,9 @@
-- Operator-configurable GPU idle auto-shutdown. Previously the idle-teardown
-- sweep was env-only (GPU_IDLE_TIMEOUT_MINUTES); these columns let the operator
-- enable/disable it and edit the timeout at runtime from the Hosts view,
-- persisted in the prospector_settings singleton (same pattern as the GO/PAUSE/
-- AWAY kill-switch). Default ON @ 30m preserves prior behavior.
ALTER TABLE prospector_settings
ADD COLUMN IF NOT EXISTS gpu_idle_shutdown_enabled BOOLEAN NOT NULL DEFAULT true,
ADD COLUMN IF NOT EXISTS gpu_idle_timeout_minutes INTEGER NOT NULL DEFAULT 30;

View file

@ -26,6 +26,16 @@ export class ProspectorSettingsEntity {
@Column({ type: 'text', default: 'PAUSE' })
mode!: ProspectorMode;
/** Operator switch for the GPU idle-teardown sweep (Hosts view). When false the
* sweep never tears down an idle droplet. Default true preserves prior env-only
* behavior. */
@Column({ name: 'gpu_idle_shutdown_enabled', type: 'boolean', default: true })
gpu_idle_shutdown_enabled!: boolean;
/** Idle window in minutes before the sweep tears down an unused droplet. */
@Column({ name: 'gpu_idle_timeout_minutes', type: 'int', default: 30 })
gpu_idle_timeout_minutes!: number;
@UpdateDateColumn({ name: 'updated_at', type: 'timestamptz' })
updated_at!: Date;
}

View file

@ -47,30 +47,37 @@ describe('reconcileStatus', () => {
describe('buildGpuStatus', () => {
it('reports no droplet but live model-boss state when the row is absent', () => {
const payload = buildGpuStatus(null, null, true, 'http://10.9.0.4:8080', 30, NOW);
const payload = buildGpuStatus(null, null, true, 'http://10.9.0.4:8080', 30, true, NOW);
expect(payload.droplet).toBeNull();
expect(payload.modelBoss).toEqual({ reachable: true, url: 'http://10.9.0.4:8080' });
expect(payload.gpuLoad).toBeNull();
expect(payload.idleTimeoutMinutes).toBe(30);
expect(payload.idleShutdownEnabled).toBe(true);
expect(payload.generatedAt).toBe(NOW.toISOString());
});
it('keeps the stored droplet status when DO status is null', () => {
const payload = buildGpuStatus(entity({ status: 'provisioning' }), null, false, null, 30, NOW);
const payload = buildGpuStatus(entity({ status: 'provisioning' }), null, false, null, 30, true, NOW);
expect(payload.droplet?.status).toBe('provisioning');
expect(payload.modelBoss).toEqual({ reachable: false, url: null });
});
it('reflects the live DO status into the snapshot', () => {
const payload = buildGpuStatus(entity({ status: 'provisioning' }), 'active', true, 'http://boss', 30, NOW);
const payload = buildGpuStatus(entity({ status: 'provisioning' }), 'active', true, 'http://boss', 30, true, NOW);
expect(payload.droplet?.status).toBe('active');
expect(payload.droplet?.wireguardIp).toBe('10.9.0.42');
expect(payload.droplet?.dropletId).toBe(12345);
});
it('serializes timestamps to ISO strings', () => {
const payload = buildGpuStatus(entity(), null, true, 'http://boss', 15, NOW);
const payload = buildGpuStatus(entity(), null, true, 'http://boss', 15, true, NOW);
expect(payload.droplet?.provisionedAt).toBe('2026-06-29T11:00:00.000Z');
expect(payload.droplet?.lastUsedAt).toBe('2026-06-29T11:30:00.000Z');
});
it('carries the disabled idle-shutdown flag through', () => {
const payload = buildGpuStatus(entity(), null, true, 'http://boss', 45, false, NOW);
expect(payload.idleTimeoutMinutes).toBe(45);
expect(payload.idleShutdownEnabled).toBe(false);
});
});

View file

@ -43,6 +43,7 @@ export function buildGpuStatus(
modelBossReachable: boolean,
modelBossUrl: string | null,
idleTimeoutMinutes: number,
idleShutdownEnabled: boolean,
now: Date,
): GpuStatusPayload {
return {
@ -50,6 +51,7 @@ export function buildGpuStatus(
modelBoss: { reachable: modelBossReachable, url: modelBossUrl },
gpuLoad: null,
idleTimeoutMinutes,
idleShutdownEnabled,
generatedAt: now.toISOString(),
};
}

View file

@ -2,6 +2,7 @@ import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { GpuDropletEntity } from '../entities/gpu-droplet.entity.js';
import { SettingsModule } from '../settings/settings.module.js';
import { DoDropletClient } from './do-droplet.client.js';
import { GpuController } from './gpu.controller.js';
import { GpuEnrichedClassifyService } from './gpu-enriched-classify.service.js';
@ -11,12 +12,14 @@ import { ModelBossClient } from './model-boss.client.js';
/**
* On-demand DO GPU fleet + model-boss enrich. A LEAF in the module DAG: it
* imports nothing from TasksModule / ClassifyModule (those import THIS to consume
* `GpuEnrichedClassifyService`). The enrich path is additive when DO_API_TOKEN
* / MODEL_BOSS_URL are absent the module still boots, status reports "no droplet"
* / "coordinator unreachable", and enrich returns null so callers fall back.
* `GpuEnrichedClassifyService`). It imports SettingsModule one-way for the
* operator-configurable idle auto-shutdown policy (SettingsModule imports nothing
* from gpu, so no cycle). The enrich path is additive when DO_API_TOKEN /
* MODEL_BOSS_URL are absent the module still boots, status reports "no droplet" /
* "coordinator unreachable", and enrich returns null so callers fall back.
*/
@Module({
imports: [TypeOrmModule.forFeature([GpuDropletEntity])],
imports: [TypeOrmModule.forFeature([GpuDropletEntity]), SettingsModule],
controllers: [GpuController],
providers: [DoDropletClient, ModelBossClient, GpuService, GpuEnrichedClassifyService],
exports: [GpuService, GpuEnrichedClassifyService, ModelBossClient],

View file

@ -5,6 +5,7 @@ import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { GpuDropletEntity } from '../entities/gpu-droplet.entity.js';
import { SettingsService } from '../settings/settings.service.js';
import { DoDropletClient, extractIpv4 } from './do-droplet.client.js';
import { buildGpuStatus } from './gpu-status.js';
import { ModelBossClient } from './model-boss.client.js';
@ -37,6 +38,7 @@ export class GpuService implements OnModuleInit {
private readonly doDroplet: DoDropletClient,
private readonly modelBoss: ModelBossClient,
private readonly config: ConfigService,
private readonly settings: SettingsService,
) {}
/** Reconcile our row against the live droplet (active→update, 404→delete, unreachable→keep). */
@ -79,12 +81,14 @@ export class GpuService implements OnModuleInit {
}
}
const modelBossReachable = await this.modelBoss.health();
const settings = await this.settings.get();
return buildGpuStatus(
entity,
doDropletStatus,
modelBossReachable,
this.modelBoss.baseUrl,
this.idleTimeoutMinutes(),
this.idleTimeoutMinutes(settings.gpu_idle_timeout_minutes),
settings.gpu_idle_shutdown_enabled,
new Date(),
);
}
@ -136,14 +140,17 @@ export class GpuService implements OnModuleInit {
await this.repo.update({ id: entity.id }, { last_used_at: now, updated_at: now });
}
/** Idle-teardown sweep: tear down an active droplet that has been unused past the window. */
/** Idle-teardown sweep: tear down an active droplet that has been unused past the
* window but only when the operator has the idle auto-shutdown enabled. */
@Interval(IDLE_CHECK_MS)
async idleTimeoutCheck(): Promise<void> {
const entity = await this.current();
if (!entity || entity.status !== 'active') return;
const settings = await this.settings.get();
if (!settings.gpu_idle_shutdown_enabled) return;
const lastUsed = (entity.last_used_at ?? entity.provisioned_at).getTime();
const idleMs = Date.now() - lastUsed;
if (idleMs > this.idleTimeoutMinutes() * 60_000) {
if (idleMs > this.idleTimeoutMinutes(settings.gpu_idle_timeout_minutes) * 60_000) {
this.logger.log(`idle teardown: droplet ${entity.droplet_id} unused for ${Math.round(idleMs / 60_000)}m`);
await this.teardown().catch((err) => this.logger.warn(`idle teardown failed: ${String(err)}`));
}
@ -154,7 +161,10 @@ export class GpuService implements OnModuleInit {
return this.repo.findOne({ where: {}, order: { provisioned_at: 'DESC' } });
}
private idleTimeoutMinutes(): number {
/** Effective idle window: the operator's persisted value, falling back to the
* env `GPU_IDLE_TIMEOUT_MINUTES`, then 30. */
private idleTimeoutMinutes(persisted?: number): number {
if (typeof persisted === 'number' && Number.isFinite(persisted) && persisted > 0) return persisted;
const raw = Number(this.config.get<string>('GPU_IDLE_TIMEOUT_MINUTES'));
return Number.isFinite(raw) && raw > 0 ? raw : 30;
}

View file

@ -44,6 +44,8 @@ export interface GpuStatusPayload {
/** Always null — neither DO nor model-boss exposes a live GPU utilization %. */
readonly gpuLoad: number | null;
readonly idleTimeoutMinutes: number;
/** Operator switch: when false the idle-teardown sweep never fires. */
readonly idleShutdownEnabled: boolean;
readonly generatedAt: string;
}

View file

@ -1,9 +1,9 @@
import { IsIn, IsOptional, IsString, MaxLength, MinLength } from 'class-validator';
import { IsBoolean, IsIn, IsInt, IsOptional, IsString, Max, MaxLength, Min, MinLength } from 'class-validator';
import { PROSPECTOR_MODES, type ProspectorMode } from '../../entities/index.js';
/** Both optional so a kill-switch flip ({mode}) and an engine change ({draftEngine})
* are each valid alone; the service rejects an empty patch. */
/** All optional so a kill-switch flip ({mode}), an engine change ({draftEngine}),
* or a GPU idle-policy edit are each valid alone; the service rejects an empty patch. */
export class UpdateSettingsDto {
@IsOptional()
@IsIn(PROSPECTOR_MODES as readonly string[])
@ -14,4 +14,16 @@ export class UpdateSettingsDto {
@MinLength(1)
@MaxLength(64)
draftEngine?: string;
/** Enable/disable the GPU idle auto-shutdown sweep (Hosts view). */
@IsOptional()
@IsBoolean()
gpuIdleShutdownEnabled?: boolean;
/** Idle window in minutes (1..1440) before an unused droplet is torn down. */
@IsOptional()
@IsInt()
@Min(1)
@Max(1440)
gpuIdleTimeoutMinutes?: number;
}

View file

@ -19,13 +19,15 @@ export class SettingsService {
}
async update(patch: UpdateSettingsDto): Promise<ProspectorSettingsEntity> {
if (patch.mode === undefined && patch.draftEngine === undefined) {
throw new BadRequestException('at least one of mode or draftEngine is required');
}
await this.get(); // ensure the row exists
const set: Partial<ProspectorSettingsEntity> = {};
if (patch.mode !== undefined) set.mode = patch.mode;
if (patch.draftEngine !== undefined) set.draft_engine = patch.draftEngine;
if (patch.gpuIdleShutdownEnabled !== undefined) set.gpu_idle_shutdown_enabled = patch.gpuIdleShutdownEnabled;
if (patch.gpuIdleTimeoutMinutes !== undefined) set.gpu_idle_timeout_minutes = patch.gpuIdleTimeoutMinutes;
if (Object.keys(set).length === 0) {
throw new BadRequestException('at least one settable field is required');
}
await this.get(); // ensure the row exists
await this.repo.update({ id: 1 }, set);
return this.get();
}

View file

@ -5,6 +5,8 @@ export interface Settings {
id: 1;
draft_engine: string;
mode: Mode;
gpu_idle_shutdown_enabled: boolean;
gpu_idle_timeout_minutes: number;
updated_at: string;
}
@ -35,6 +37,8 @@ export interface Digest {
export interface UpdateSettingsBody {
mode?: Mode;
draftEngine?: string;
gpuIdleShutdownEnabled?: boolean;
gpuIdleTimeoutMinutes?: number;
}
export class ApiError extends Error {
@ -656,6 +660,7 @@ export interface GpuStatusPayload {
/** Always null — neither DO nor model-boss exposes a live GPU utilization %. */
gpuLoad: number | null;
idleTimeoutMinutes: number;
idleShutdownEnabled: boolean;
generatedAt: string;
}

View file

@ -2,11 +2,14 @@ import { useCallback, useEffect, useRef, useState } from 'react';
import {
getGpuStatus,
getSettings,
provisionGpu,
teardownGpu,
updateSettings,
type GpuDropletInfo,
type GpuDropletStatus,
type GpuStatusPayload,
type Settings,
} from '../api';
const POLL_MS = 30000;
@ -38,6 +41,9 @@ function badgeFor(droplet: GpuDropletInfo | null): BadgeSpec {
/** Hosts: the on-demand DO GPU droplet + model-boss coordinator, with mesh fleet. */
export function HostsView(): JSX.Element {
const [status, setStatus] = useState<GpuStatusPayload | null>(null);
const [settings, setSettings] = useState<Settings | null>(null);
const [idleDraft, setIdleDraft] = useState<{ enabled: boolean; minutes: string } | null>(null);
const [savingIdle, setSavingIdle] = useState(false);
const [error, setError] = useState<string | null>(null);
const [busy, setBusy] = useState(false);
const [confirm, setConfirm] = useState<'provision' | 'teardown' | null>(null);
@ -45,9 +51,16 @@ export function HostsView(): JSX.Element {
const reload = useCallback(async () => {
try {
const next = await getGpuStatus();
const [nextStatus, nextSettings] = await Promise.all([getGpuStatus(), getSettings()]);
if (activeRef.current) {
setStatus(next);
setStatus(nextStatus);
setSettings(nextSettings);
setIdleDraft((prev) =>
prev ?? {
enabled: nextSettings.gpu_idle_shutdown_enabled,
minutes: String(nextSettings.gpu_idle_timeout_minutes),
},
);
setError(null);
}
} catch (err) {
@ -93,10 +106,39 @@ export function HostsView(): JSX.Element {
}
};
const saveIdle = async () => {
if (!idleDraft) return;
const minutes = Number(idleDraft.minutes);
if (!Number.isInteger(minutes) || minutes < 1 || minutes > 1440) {
setError('Idle timeout must be a whole number of minutes between 1 and 1440.');
return;
}
setSavingIdle(true);
setError(null);
try {
const next = await updateSettings({
gpuIdleShutdownEnabled: idleDraft.enabled,
gpuIdleTimeoutMinutes: minutes,
});
setSettings(next);
setIdleDraft({ enabled: next.gpu_idle_shutdown_enabled, minutes: String(next.gpu_idle_timeout_minutes) });
await reload();
} catch (err) {
setError(err instanceof Error ? err.message : String(err));
} finally {
setSavingIdle(false);
}
};
if (!status && !error) return <div className="muted">Loading hosts</div>;
const droplet = status?.droplet ?? null;
const badge = badgeFor(droplet);
const idleDirty =
settings != null &&
idleDraft != null &&
(idleDraft.enabled !== settings.gpu_idle_shutdown_enabled ||
idleDraft.minutes !== String(settings.gpu_idle_timeout_minutes));
return (
<div className="view-stack">
@ -171,7 +213,68 @@ export function HostsView(): JSX.Element {
<div className="muted" style={{ fontSize: 11, marginTop: 14, lineHeight: 1.5 }}>
Provisioned via <span className="mono">provision-raw-gpu-droplet.sh</span> model-boss serves the
shared quinn-api classify + draft enrich. Idle teardown after {status?.idleTimeoutMinutes ?? 30}m.
shared quinn-api classify + draft enrich.
</div>
{/* Idle auto-shutdown — operator-configurable (persisted in settings) */}
<div className="panel" style={{ marginTop: 14 }}>
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', gap: 12 }}>
<label style={{ display: 'flex', alignItems: 'center', gap: 8, fontSize: 13, cursor: 'pointer' }}>
<input
type="checkbox"
checked={idleDraft?.enabled ?? false}
disabled={!idleDraft || savingIdle}
onChange={(e) =>
setIdleDraft((prev) => (prev ? { ...prev, enabled: e.target.checked } : prev))
}
/>
<span style={{ color: '#e2e8f0', fontWeight: 500 }}>Idle auto-shutdown</span>
</label>
<span
className="pill"
style={
status?.idleShutdownEnabled
? { background: '#34d3991a', color: '#34d399', border: '1px solid #34d39933' }
: { background: '#64748b1a', color: '#94a3b8', border: '1px solid #64748b33' }
}
>
{status?.idleShutdownEnabled ? `ON • ${status.idleTimeoutMinutes}m` : 'OFF'}
</span>
</div>
<div style={{ marginTop: 12, display: 'flex', alignItems: 'center', gap: 8, flexWrap: 'wrap' }}>
<span className="muted" style={{ fontSize: 12 }}>
Tear down after
</span>
<input
className="input input--sm"
type="number"
min={1}
max={1440}
step={1}
style={{ width: 80 }}
value={idleDraft?.minutes ?? ''}
disabled={!idleDraft || !idleDraft.enabled || savingIdle}
onChange={(e) =>
setIdleDraft((prev) => (prev ? { ...prev, minutes: e.target.value } : prev))
}
/>
<span className="muted" style={{ fontSize: 12 }}>
minutes idle
</span>
<button
className="btn btn--primary btn--sm"
disabled={!idleDirty || savingIdle}
onClick={saveIdle}
style={{ marginLeft: 'auto' }}
>
{savingIdle ? 'saving…' : 'save'}
</button>
</div>
<div className="muted" style={{ fontSize: 10, marginTop: 8, lineHeight: 1.4 }}>
When disabled the droplet stays up until torn down by hand (it keeps billing). Effective:{' '}
{status?.idleShutdownEnabled ? `auto-teardown after ${status.idleTimeoutMinutes}m idle` : 'never auto-tears down'}.
</div>
</div>
{/* Actions — two-step confirm */}