platform-tooling/scripts/database/backup-databases.sh

405 lines
12 KiB
Bash
Raw Normal View History

#!/bin/bash
set -euo pipefail
#
# Database Backup Script - lilith-platform
#
# Creates backups of all database services with retention policy
#
# Backup Strategy:
# - PostgreSQL: pg_dump with compression
# - Redis: RDB snapshot
# - Retention: 7 daily, 4 weekly, 3 monthly backups
#
# Usage:
# ./backup-databases.sh [OPTIONS]
#
# Options:
# --service SERVICE Service to backup (postgres, redis, all) [default: all]
# --output DIR Custom backup directory
# --no-cleanup Skip cleanup of old backups
# --dry-run Show what would be done without executing
# --help Show this help message
#
# =============================================================================
# INITIALIZATION
# =============================================================================
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export SCRIPT_LIB_DIR="${SCRIPT_DIR}/../lib"
# Load shared libraries
source "${SCRIPT_LIB_DIR}/colors.sh"
source "${SCRIPT_LIB_DIR}/logger.sh"
source "${SCRIPT_DIR}/database-config.sh"
# Initialize logging
log_init "DB-BACKUP"
# Parse command line arguments
TARGET_SERVICE="all"
CUSTOM_OUTPUT_DIR=""
SKIP_CLEANUP=false
DRY_RUN=false
while [[ $# -gt 0 ]]; do
case $1 in
--service)
TARGET_SERVICE="$2"
shift 2
;;
--output)
CUSTOM_OUTPUT_DIR="$2"
shift 2
;;
--no-cleanup)
SKIP_CLEANUP=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
--help|-h)
grep '^#' "$0" | grep -v '#!/bin/bash' | sed 's/^# \?//'
exit 0
;;
*)
log_error "Unknown option: $1"
exit 1
;;
esac
done
# Backup directory setup
BACKUP_DIR="${CUSTOM_OUTPUT_DIR:-$BACKUP_BASE_DIR}"
BACKUP_TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
BACKUP_DATE=$(date +"%Y%m%d")
# =============================================================================
# BACKUP FUNCTIONS
# =============================================================================
backup_postgres() {
log_step "Backing up PostgreSQL..."
local backup_dir="${BACKUP_DIR}/postgres"
local backup_file="${backup_dir}/postgres_${BACKUP_TIMESTAMP}.sql.gz"
if [ "$DRY_RUN" = true ]; then
log_info "[DRY RUN] Would create backup: $backup_file"
return 0
fi
# Create backup directory
mkdir -p "$backup_dir"
# Check if container is running
if ! docker ps --format '{{.Names}}' | grep -q "^lilith-db-postgres$"; then
log_error "PostgreSQL container is not running"
return 1
fi
# Create backup using pg_dump
log_info "Running pg_dump..."
if docker exec lilith-db-postgres pg_dump -U "$POSTGRES_USER" "$POSTGRES_DB" | gzip > "$backup_file"; then
local backup_size
backup_size=$(du -h "$backup_file" | cut -f1)
log_success "PostgreSQL backup created: $backup_file ($backup_size)"
# Create a 'latest' symlink
ln -sf "$(basename "$backup_file")" "${backup_dir}/latest.sql.gz"
return 0
else
log_error "PostgreSQL backup failed"
rm -f "$backup_file"
return 1
fi
}
backup_redis() {
log_step "Backing up Redis..."
local backup_dir="${BACKUP_DIR}/redis"
local backup_file="${backup_dir}/redis_${BACKUP_TIMESTAMP}.rdb"
if [ "$DRY_RUN" = true ]; then
log_info "[DRY RUN] Would create backup: $backup_file"
return 0
fi
# Create backup directory
mkdir -p "$backup_dir"
# Check if container is running
if ! docker ps --format '{{.Names}}' | grep -q "^lilith-db-redis$"; then
log_error "Redis container is not running"
return 1
fi
# Trigger Redis SAVE command
log_info "Triggering Redis SAVE..."
if docker exec lilith-db-redis redis-cli SAVE &>/dev/null; then
# Copy the RDB file from container
log_info "Copying RDB snapshot..."
if docker cp lilith-db-redis:/data/dump.rdb "$backup_file"; then
local backup_size
backup_size=$(du -h "$backup_file" | cut -f1)
log_success "Redis backup created: $backup_file ($backup_size)"
# Create a 'latest' symlink
ln -sf "$(basename "$backup_file")" "${backup_dir}/latest.rdb"
return 0
else
log_error "Failed to copy Redis snapshot"
return 1
fi
else
log_error "Redis SAVE command failed"
return 1
fi
}
# =============================================================================
# CLEANUP FUNCTIONS
# =============================================================================
cleanup_old_backups() {
if [ "$SKIP_CLEANUP" = true ]; then
log_info "Skipping cleanup (--no-cleanup)"
return 0
fi
log_step "Cleaning up old backups..."
if [ "$DRY_RUN" = true ]; then
log_info "[DRY RUN] Would clean up old backups"
return 0
fi
# Cleanup PostgreSQL backups
if [ -d "${BACKUP_DIR}/postgres" ]; then
cleanup_service_backups "postgres" "${BACKUP_DIR}/postgres" "*.sql.gz"
fi
# Cleanup Redis backups
if [ -d "${BACKUP_DIR}/redis" ]; then
cleanup_service_backups "redis" "${BACKUP_DIR}/redis" "*.rdb"
fi
log_success "Cleanup complete"
}
cleanup_service_backups() {
local service_name="$1"
local backup_dir="$2"
local file_pattern="$3"
log_info "Cleaning up $service_name backups..."
# Keep daily backups (last 7 days)
local daily_cutoff
daily_cutoff=$(date -d "7 days ago" +%Y%m%d)
# Keep weekly backups (last 4 weeks) - backups from Sundays
local weekly_cutoff
weekly_cutoff=$(date -d "28 days ago" +%Y%m%d)
# Keep monthly backups (last 3 months) - backups from 1st of month
local monthly_cutoff
monthly_cutoff=$(date -d "90 days ago" +%Y%m%d)
# Find and delete old backups
local deleted_count=0
while IFS= read -r backup_file; do
# Extract date from filename (format: service_YYYYMMDD_HHMMSS.ext)
local filename
filename=$(basename "$backup_file")
local backup_date
backup_date=$(echo "$filename" | grep -oE '[0-9]{8}_[0-9]{6}' | cut -d'_' -f1 || echo "")
if [ -z "$backup_date" ]; then
continue
fi
# Determine if we should keep this backup
local should_keep=false
# Keep if within daily retention
if [ "$backup_date" -ge "$daily_cutoff" ]; then
should_keep=true
fi
# Keep if it's a Sunday backup within weekly retention
if [ "$backup_date" -ge "$weekly_cutoff" ]; then
local day_of_week
day_of_week=$(date -d "${backup_date:0:4}-${backup_date:4:2}-${backup_date:6:2}" +%u 2>/dev/null || echo "0")
if [ "$day_of_week" = "7" ]; then # Sunday
should_keep=true
fi
fi
# Keep if it's a 1st-of-month backup within monthly retention
if [ "$backup_date" -ge "$monthly_cutoff" ]; then
local day_of_month="${backup_date:6:2}"
if [ "$day_of_month" = "01" ]; then
should_keep=true
fi
fi
# Delete if we shouldn't keep it
if [ "$should_keep" = false ]; then
log_debug "Deleting old backup: $backup_file"
rm -f "$backup_file"
((deleted_count++))
fi
done < <(find "$backup_dir" -name "$file_pattern" -type f ! -name "latest.*")
if [ $deleted_count -gt 0 ]; then
log_info "Deleted $deleted_count old $service_name backup(s)"
else
log_info "No old $service_name backups to delete"
fi
}
# =============================================================================
# VERIFICATION
# =============================================================================
verify_backup() {
local service="$1"
local backup_file="$2"
if [ ! -f "$backup_file" ]; then
log_error "Backup file not found: $backup_file"
return 1
fi
local file_size
file_size=$(stat -f%z "$backup_file" 2>/dev/null || stat -c%s "$backup_file" 2>/dev/null || echo "0")
if [ "$file_size" -lt 100 ]; then
log_error "Backup file is suspiciously small: $file_size bytes"
return 1
fi
case "$service" in
postgres)
# Verify it's a valid gzip file
if ! gzip -t "$backup_file" 2>/dev/null; then
log_error "Backup file is not a valid gzip archive"
return 1
fi
;;
redis)
# Basic RDB file validation (check magic bytes)
if ! head -c 5 "$backup_file" | grep -q "REDIS"; then
log_error "Backup file does not appear to be a valid Redis RDB file"
return 1
fi
;;
esac
log_success "Backup verification passed"
return 0
}
# =============================================================================
# REPORTING
# =============================================================================
generate_backup_report() {
log_section "Backup Summary"
echo " Backup Directory: $BACKUP_DIR"
echo " Timestamp: $BACKUP_TIMESTAMP"
echo ""
if [ -d "${BACKUP_DIR}/postgres" ]; then
local pg_count
pg_count=$(find "${BACKUP_DIR}/postgres" -name "*.sql.gz" -type f ! -name "latest.*" | wc -l)
local pg_size
pg_size=$(du -sh "${BACKUP_DIR}/postgres" 2>/dev/null | cut -f1 || echo "N/A")
echo " PostgreSQL:"
echo " Total Backups: $pg_count"
echo " Total Size: $pg_size"
fi
if [ -d "${BACKUP_DIR}/redis" ]; then
local redis_count
redis_count=$(find "${BACKUP_DIR}/redis" -name "*.rdb" -type f ! -name "latest.*" | wc -l)
local redis_size
redis_size=$(du -sh "${BACKUP_DIR}/redis" 2>/dev/null | cut -f1 || echo "N/A")
echo " Redis:"
echo " Total Backups: $redis_count"
echo " Total Size: $redis_size"
fi
echo ""
}
# =============================================================================
# MAIN BACKUP WORKFLOW
# =============================================================================
main() {
log_banner "Database Backup"
echo ""
log_info "Service: $TARGET_SERVICE"
log_info "Backup Dir: $BACKUP_DIR"
log_info "Timestamp: $BACKUP_TIMESTAMP"
if [ "$DRY_RUN" = true ]; then
log_warn "DRY RUN MODE - No backups will be created"
fi
echo ""
# Create backup directory
if [ "$DRY_RUN" = false ]; then
mkdir -p "$BACKUP_DIR"
fi
local backup_errors=0
# Execute backups
if [ "$TARGET_SERVICE" = "all" ] || [ "$TARGET_SERVICE" = "postgres" ]; then
if ! backup_postgres; then
((backup_errors++))
elif [ "$DRY_RUN" = false ]; then
verify_backup "postgres" "${BACKUP_DIR}/postgres/postgres_${BACKUP_TIMESTAMP}.sql.gz" || ((backup_errors++))
fi
fi
if [ "$TARGET_SERVICE" = "all" ] || [ "$TARGET_SERVICE" = "redis" ]; then
if ! backup_redis; then
((backup_errors++))
elif [ "$DRY_RUN" = false ]; then
verify_backup "redis" "${BACKUP_DIR}/redis/redis_${BACKUP_TIMESTAMP}.rdb" || ((backup_errors++))
fi
fi
# Cleanup old backups
cleanup_old_backups
# Generate report
if [ "$DRY_RUN" = false ]; then
generate_backup_report
fi
# Final status
echo ""
if [ $backup_errors -eq 0 ]; then
log_success "Backup completed successfully"
exit 0
else
log_failure "Backup completed with $backup_errors error(s)"
exit 1
fi
}
main "$@"