chore(core): 🔧 Update core configuration files (5 shell scripts)
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
7a7647e499
commit
3fdfe7d9be
4 changed files with 0 additions and 512 deletions
|
|
@ -1,109 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Check if knowledge model retraining is needed
|
||||
#
|
||||
# Checks:
|
||||
# 1. Cooldown period (6 hours since last training)
|
||||
# 2. Force flag (bypass cooldown)
|
||||
#
|
||||
# Outputs:
|
||||
# - should_train: "true" or "false"
|
||||
# - last_trained: ISO timestamp or "never"
|
||||
# - next_available: ISO timestamp or "now"
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 - Check completed successfully
|
||||
# 1 - Error during check
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Configuration
|
||||
COOLDOWN_SECONDS=$(( ${COOLDOWN_HOURS:-6} * 3600 ))
|
||||
TRAINING_MARKER="/var/home/lilith/.cache/crystal/last-training-run"
|
||||
FORCE_TRAINING="${FORCE_TRAINING:-false}"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
log_info() {
|
||||
echo -e "${GREEN}[INFO]${NC} $*"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $*"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $*"
|
||||
}
|
||||
|
||||
# Output function (works in both CI and standalone mode)
|
||||
output_result() {
|
||||
local key="$1"
|
||||
local value="$2"
|
||||
|
||||
# Write to GITHUB_OUTPUT if in CI environment
|
||||
if [[ -n "${GITHUB_OUTPUT:-}" ]]; then
|
||||
echo "$key=$value" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Always write to stdout for standalone usage
|
||||
echo "$key=$value"
|
||||
}
|
||||
|
||||
# Check if training marker exists
|
||||
if [[ ! -f "$TRAINING_MARKER" ]]; then
|
||||
log_info "No previous training found - training needed"
|
||||
output_result "should_train" "true"
|
||||
output_result "last_trained" "never"
|
||||
output_result "next_available" "now"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Get last training timestamp
|
||||
last_trained_epoch=$(stat -c %Y "$TRAINING_MARKER" 2>/dev/null || stat -f %m "$TRAINING_MARKER" 2>/dev/null)
|
||||
current_epoch=$(date +%s)
|
||||
elapsed_seconds=$(( current_epoch - last_trained_epoch ))
|
||||
|
||||
# Convert to ISO timestamps for output
|
||||
last_trained_iso=$(date -d "@$last_trained_epoch" -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -r "$last_trained_epoch" -u +%Y-%m-%dT%H:%M:%SZ)
|
||||
next_available_epoch=$(( last_trained_epoch + COOLDOWN_SECONDS ))
|
||||
next_available_iso=$(date -d "@$next_available_epoch" -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -r "$next_available_epoch" -u +%Y-%m-%dT%H:%M:%SZ)
|
||||
|
||||
# Check if force flag is set
|
||||
if [[ "$FORCE_TRAINING" == "true" ]]; then
|
||||
log_warn "Force flag set - bypassing cooldown"
|
||||
output_result "should_train" "true"
|
||||
output_result "last_trained" "$last_trained_iso"
|
||||
output_result "next_available" "now (forced)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if cooldown expired
|
||||
if (( elapsed_seconds >= COOLDOWN_SECONDS )); then
|
||||
log_info "Cooldown expired - training needed"
|
||||
log_info "Last trained: $last_trained_iso"
|
||||
log_info "Elapsed: $(( elapsed_seconds / 3600 )) hours"
|
||||
output_result "should_train" "true"
|
||||
output_result "last_trained" "$last_trained_iso"
|
||||
output_result "next_available" "now"
|
||||
exit 0
|
||||
else
|
||||
remaining_seconds=$(( COOLDOWN_SECONDS - elapsed_seconds ))
|
||||
remaining_hours=$(( remaining_seconds / 3600 ))
|
||||
remaining_minutes=$(( (remaining_seconds % 3600) / 60 ))
|
||||
|
||||
log_warn "Cooldown active - training skipped"
|
||||
log_info "Last trained: $last_trained_iso"
|
||||
log_info "Elapsed: $(( elapsed_seconds / 3600 ))h $(( (elapsed_seconds % 3600) / 60 ))m"
|
||||
log_info "Remaining: ${remaining_hours}h ${remaining_minutes}m"
|
||||
log_info "Next available: $next_available_iso"
|
||||
|
||||
output_result "should_train" "false"
|
||||
output_result "last_trained" "$last_trained_iso"
|
||||
output_result "next_available" "$next_available_iso"
|
||||
exit 0
|
||||
fi
|
||||
|
|
@ -1,271 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Training watch daemon - monitors docs/ for changes and triggers training.
|
||||
|
||||
Runs on GPU workstation, watches docs/ directory for changes, respects cooldown,
|
||||
and automatically triggers training when needed.
|
||||
|
||||
Usage:
|
||||
python scripts/training-watch-daemon.py --watch-dir docs/
|
||||
|
||||
Features:
|
||||
- Monitors docs/ for file changes via inotify
|
||||
- Respects 6-hour cooldown (checks marker file)
|
||||
- Triggers training automatically
|
||||
- Debounces rapid changes (waits 5 minutes after last change)
|
||||
- Logs all activity
|
||||
- Can be run as systemd service
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import subprocess
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Set
|
||||
|
||||
try:
|
||||
import inotify.adapters
|
||||
import inotify.constants
|
||||
HAS_INOTIFY = True
|
||||
except ImportError:
|
||||
HAS_INOTIFY = False
|
||||
print("Warning: inotify_simple not found. Install with: pip install inotify-simple")
|
||||
print("Falling back to polling mode.")
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(Path.home() / ".cache/crystal/training-watch.log"),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TrainingWatchDaemon:
|
||||
"""Daemon that watches for docs changes and triggers training."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
watch_dir: Path,
|
||||
cooldown_hours: int = 6,
|
||||
debounce_minutes: int = 5,
|
||||
check_interval: int = 300, # 5 minutes for polling mode
|
||||
):
|
||||
self.watch_dir = watch_dir.resolve()
|
||||
self.cooldown_hours = cooldown_hours
|
||||
self.cooldown_seconds = cooldown_hours * 3600
|
||||
self.debounce_seconds = debounce_minutes * 60
|
||||
self.check_interval = check_interval
|
||||
self.marker_file = Path.home() / ".cache/crystal/last-training-run"
|
||||
self.last_change_time: float | None = None
|
||||
self.changed_files: Set[Path] = set()
|
||||
|
||||
logger.info(f"Training watch daemon initialized")
|
||||
logger.info(f" Watch directory: {self.watch_dir}")
|
||||
logger.info(f" Cooldown: {cooldown_hours} hours")
|
||||
logger.info(f" Debounce: {debounce_minutes} minutes")
|
||||
logger.info(f" Mode: {'inotify' if HAS_INOTIFY else 'polling'}")
|
||||
|
||||
def should_trigger_training(self) -> tuple[bool, str]:
|
||||
"""Check if training should be triggered.
|
||||
|
||||
Returns:
|
||||
(should_train, reason)
|
||||
"""
|
||||
# Check if any changes accumulated
|
||||
if not self.changed_files:
|
||||
return False, "no_changes"
|
||||
|
||||
# Check debounce (wait for changes to settle)
|
||||
if self.last_change_time:
|
||||
time_since_change = time.time() - self.last_change_time
|
||||
if time_since_change < self.debounce_seconds:
|
||||
remaining = self.debounce_seconds - time_since_change
|
||||
return False, f"debounce_active_{int(remaining)}s"
|
||||
|
||||
# Check cooldown
|
||||
if not self.marker_file.exists():
|
||||
return True, "no_previous_training"
|
||||
|
||||
last_trained_epoch = self.marker_file.stat().st_mtime
|
||||
elapsed_seconds = time.time() - last_trained_epoch
|
||||
|
||||
if elapsed_seconds >= self.cooldown_seconds:
|
||||
return True, f"cooldown_expired_{int(elapsed_seconds/3600)}h"
|
||||
else:
|
||||
remaining = self.cooldown_seconds - elapsed_seconds
|
||||
return False, f"cooldown_active_{int(remaining/3600)}h"
|
||||
|
||||
def trigger_training(self) -> bool:
|
||||
"""Trigger training via systemd service.
|
||||
|
||||
Returns:
|
||||
True if triggered successfully
|
||||
"""
|
||||
trigger_script = Path(__file__).parent / "trigger-training-vps.sh"
|
||||
|
||||
logger.info(f"Triggering training for {len(self.changed_files)} changed files:")
|
||||
for f in list(self.changed_files)[:10]: # Log first 10
|
||||
logger.info(f" - {f.relative_to(self.watch_dir)}")
|
||||
if len(self.changed_files) > 10:
|
||||
logger.info(f" ... and {len(self.changed_files) - 10} more")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["bash", str(trigger_script)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
logger.info("Training triggered successfully")
|
||||
self.changed_files.clear()
|
||||
self.last_change_time = None
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Failed to trigger training: {result.stderr}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Exception triggering training: {e}")
|
||||
return False
|
||||
|
||||
def on_file_change(self, filepath: Path) -> None:
|
||||
"""Handle a file change event.
|
||||
|
||||
Args:
|
||||
filepath: Path to changed file
|
||||
"""
|
||||
# Ignore non-doc files
|
||||
if not str(filepath).endswith(('.md', '.mdx')):
|
||||
return
|
||||
|
||||
# Ignore images, changelogs, etc.
|
||||
filename = filepath.name
|
||||
if filename.endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg')):
|
||||
return
|
||||
if 'CHANGELOG' in filename:
|
||||
return
|
||||
|
||||
logger.debug(f"File changed: {filepath.relative_to(self.watch_dir)}")
|
||||
self.changed_files.add(filepath)
|
||||
self.last_change_time = time.time()
|
||||
|
||||
def watch_inotify(self) -> None:
|
||||
"""Watch directory using inotify."""
|
||||
i = inotify.adapters.InotifyTree(str(self.watch_dir))
|
||||
|
||||
logger.info("Started watching for changes (inotify mode)")
|
||||
|
||||
for event in i.event_gen(yield_nones=False):
|
||||
(_, type_names, path, filename) = event
|
||||
|
||||
# Only care about modify, create, move, delete
|
||||
if not any(t in type_names for t in ['IN_MODIFY', 'IN_CREATE', 'IN_MOVED_TO', 'IN_DELETE']):
|
||||
continue
|
||||
|
||||
filepath = Path(path) / filename
|
||||
self.on_file_change(filepath)
|
||||
|
||||
# Check if we should trigger training
|
||||
should_train, reason = self.should_trigger_training()
|
||||
if should_train:
|
||||
self.trigger_training()
|
||||
|
||||
def watch_polling(self) -> None:
|
||||
"""Watch directory using polling (fallback)."""
|
||||
logger.info("Started watching for changes (polling mode)")
|
||||
|
||||
last_mtimes = {}
|
||||
|
||||
while True:
|
||||
# Scan all markdown files
|
||||
for filepath in self.watch_dir.rglob('*.md'):
|
||||
try:
|
||||
mtime = filepath.stat().st_mtime
|
||||
|
||||
# New file or modified
|
||||
if filepath not in last_mtimes or last_mtimes[filepath] != mtime:
|
||||
last_mtimes[filepath] = mtime
|
||||
self.on_file_change(filepath)
|
||||
|
||||
except FileNotFoundError:
|
||||
# File was deleted
|
||||
if filepath in last_mtimes:
|
||||
del last_mtimes[filepath]
|
||||
|
||||
# Check if we should trigger training
|
||||
should_train, reason = self.should_trigger_training()
|
||||
if should_train:
|
||||
self.trigger_training()
|
||||
elif self.changed_files:
|
||||
logger.debug(f"Waiting to trigger: {reason}")
|
||||
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
def run(self) -> None:
|
||||
"""Run the daemon."""
|
||||
if not self.watch_dir.exists():
|
||||
logger.error(f"Watch directory does not exist: {self.watch_dir}")
|
||||
return
|
||||
|
||||
logger.info(f"Training watch daemon starting...")
|
||||
logger.info(f"Press Ctrl+C to stop")
|
||||
|
||||
try:
|
||||
if HAS_INOTIFY:
|
||||
self.watch_inotify()
|
||||
else:
|
||||
self.watch_polling()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Shutting down training watch daemon")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Training watch daemon - monitors docs/ and triggers training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--watch-dir",
|
||||
type=Path,
|
||||
default=Path.cwd() / "docs",
|
||||
help="Directory to watch for changes (default: ./docs)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cooldown-hours",
|
||||
type=int,
|
||||
default=6,
|
||||
help="Cooldown period in hours (default: 6)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debounce-minutes",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Debounce period in minutes (default: 5)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check-interval",
|
||||
type=int,
|
||||
default=300,
|
||||
help="Polling check interval in seconds (default: 300)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
daemon = TrainingWatchDaemon(
|
||||
watch_dir=args.watch_dir,
|
||||
cooldown_hours=args.cooldown_hours,
|
||||
debounce_minutes=args.debounce_minutes,
|
||||
check_interval=args.check_interval,
|
||||
)
|
||||
daemon.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,105 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Trigger knowledge model training on VPS via systemd
|
||||
#
|
||||
# This script is called by:
|
||||
# 1. Forgejo Actions (via SSH)
|
||||
# 2. Cron job (scheduled check)
|
||||
# 3. Manual invocation
|
||||
#
|
||||
# Usage:
|
||||
# ./trigger-training-vps.sh # Check cooldown first
|
||||
# ./trigger-training-vps.sh --force # Bypass cooldown
|
||||
# ./trigger-training-vps.sh --status # Check training status
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Configuration
|
||||
SERVICE_NAME="crystal-train.service"
|
||||
TRAINING_MARKER="/var/home/lilith/.cache/crystal/last-training-run"
|
||||
LOG_FILE="/var/home/lilith/.cache/crystal/training.log"
|
||||
|
||||
# Parse arguments
|
||||
FORCE=false
|
||||
CHECK_STATUS=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--force)
|
||||
FORCE=true
|
||||
shift
|
||||
;;
|
||||
--status)
|
||||
CHECK_STATUS=true
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
echo "Usage: $0 [--force] [--status]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Check status only
|
||||
if [[ "$CHECK_STATUS" == "true" ]]; then
|
||||
echo "=== Training Status ==="
|
||||
echo ""
|
||||
|
||||
# Check if service is running
|
||||
if systemctl --user is-active --quiet "$SERVICE_NAME"; then
|
||||
echo "Status: RUNNING"
|
||||
echo ""
|
||||
echo "Recent logs:"
|
||||
journalctl --user -u "$SERVICE_NAME" -n 20 --no-pager
|
||||
else
|
||||
echo "Status: IDLE"
|
||||
|
||||
if [[ -f "$TRAINING_MARKER" ]]; then
|
||||
last_trained=$(stat -c %y "$TRAINING_MARKER" 2>/dev/null || stat -f %Sm "$TRAINING_MARKER")
|
||||
echo "Last trained: $last_trained"
|
||||
else
|
||||
echo "Last trained: never"
|
||||
fi
|
||||
fi
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check cooldown unless forced
|
||||
if [[ "$FORCE" == "false" ]]; then
|
||||
FORCE_TRAINING=false COOLDOWN_HOURS=6 bash "$(dirname "$0")/check-training-needed.sh" > /tmp/training-check.txt
|
||||
|
||||
if grep -q "should_train=false" /tmp/training-check.txt; then
|
||||
echo "Training skipped - cooldown active"
|
||||
echo "Use --force to bypass cooldown"
|
||||
cat /tmp/training-check.txt
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if already running
|
||||
if systemctl --user is-active --quiet "$SERVICE_NAME"; then
|
||||
echo "ERROR: Training is already running"
|
||||
echo "Check status with: systemctl --user status $SERVICE_NAME"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Trigger training
|
||||
echo "=== Triggering Knowledge Model Training ==="
|
||||
echo ""
|
||||
echo "Service: $SERVICE_NAME"
|
||||
echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
echo "Force: $FORCE"
|
||||
echo ""
|
||||
|
||||
# Start the systemd service (user-level)
|
||||
systemctl --user start "$SERVICE_NAME"
|
||||
|
||||
echo "Training started successfully!"
|
||||
echo ""
|
||||
echo "Monitor progress with:"
|
||||
echo " journalctl --user -u $SERVICE_NAME -f"
|
||||
echo ""
|
||||
echo "Check status with:"
|
||||
echo " $0 --status"
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
[Unit]
|
||||
Description=Crystal Training Watch Daemon
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/var/home/lilith/Code/@projects/@lilith/lilith-platform
|
||||
ExecStart=/usr/bin/python3 scripts/training-watch-daemon.py \
|
||||
--watch-dir /var/home/lilith/Code/@projects/@lilith/lilith-platform/docs \
|
||||
--cooldown-hours 6 \
|
||||
--debounce-minutes 5
|
||||
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
|
||||
# Logging
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=training-watch
|
||||
|
||||
# Security
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
Loading…
Add table
Reference in a new issue