6de15810f4
- Boot check with safe-point detection for unclean restarts - Split system (daily) and content (2h) backup schedules - 4-phase auto-heal: filesystem repair, config restore, service restart, health verify - Self-installing: creates cron jobs, systemd shutdown hook, and config - Configurable via /etc/moko/autoheal.conf Authored-by: Moko Consulting Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
679 lines
24 KiB
Bash
679 lines
24 KiB
Bash
#!/usr/bin/env bash
|
|
# server-autoheal.sh - Auto-heal on restart + split backup management
|
|
#
|
|
# Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
# DEFGROUP: MokoStandards.Automation.ServerAutoheal
|
|
# INGROUP: MokoStandards.Automation
|
|
# REPO: https://git.mokoconsulting.tech/MokoConsulting/moko-platform
|
|
# PATH: /automation/server-autoheal.sh
|
|
# BRIEF: Server auto-heal on unclean restart + split system/content backups
|
|
#
|
|
# Usage:
|
|
# server-autoheal.sh <command> [options]
|
|
#
|
|
# Commands:
|
|
# boot-check Run at boot — auto-heals if no safe point exists
|
|
# set-safepoint Mark current state as safe (call before planned shutdown)
|
|
# backup-system Run a system backup (configs, packages, services)
|
|
# backup-content Run a content backup (site files, databases, uploads)
|
|
# cleanup Prune expired backups per retention policy
|
|
# status Show safe point and backup status
|
|
#
|
|
# Scheduling (cron):
|
|
# @reboot server-autoheal.sh boot-check
|
|
# 0 3 * * * server-autoheal.sh backup-system (daily at 3am)
|
|
# 0 */2 * * * server-autoheal.sh backup-content (every 2 hours)
|
|
# 30 */2 * * * server-autoheal.sh cleanup (30 min after content backup)
|
|
|
|
set -euo pipefail
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Configuration — override via /etc/moko/autoheal.conf
|
|
# ──────────────────────────────────────────────
|
|
CONF_FILE="/etc/moko/autoheal.conf"
|
|
[[ -f "$CONF_FILE" ]] && source "$CONF_FILE"
|
|
|
|
BACKUP_ROOT="${BACKUP_ROOT:-/var/backups/moko}"
|
|
SAFEPOINT_FILE="${SAFEPOINT_FILE:-/var/run/moko/safepoint}"
|
|
LOG_FILE="${LOG_FILE:-/var/log/moko/autoheal.log}"
|
|
LOCK_DIR="${LOCK_DIR:-/var/run/moko}"
|
|
|
|
# System backup: configs, package lists, service state, cron
|
|
SYSTEM_BACKUP_DIR="${BACKUP_ROOT}/system"
|
|
SYSTEM_BACKUP_RETAIN="${SYSTEM_BACKUP_RETAIN:-7}" # keep 7 daily system backups
|
|
|
|
# Content backup: web roots, databases, uploads
|
|
CONTENT_BACKUP_DIR="${BACKUP_ROOT}/content"
|
|
CONTENT_BACKUP_RETAIN_HOURS="${CONTENT_BACKUP_RETAIN_HOURS:-24}" # 1 day of content backups
|
|
|
|
# Paths to back up — override these in /etc/moko/autoheal.conf
|
|
SYSTEM_PATHS="${SYSTEM_PATHS:-/etc/nginx /etc/php /etc/mysql /etc/cron.d /etc/systemd/system}"
|
|
CONTENT_PATHS="${CONTENT_PATHS:-/var/www}"
|
|
DB_NAMES="${DB_NAMES:-}" # space-separated list, empty = auto-detect all
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Helpers
|
|
# ──────────────────────────────────────────────
|
|
log() {
|
|
local level="$1"; shift
|
|
local ts
|
|
ts=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
|
|
local msg="[$ts] [$level] $*"
|
|
echo "$msg" | tee -a "$LOG_FILE" >&2
|
|
}
|
|
|
|
ensure_dirs() {
|
|
mkdir -p "$SYSTEM_BACKUP_DIR" "$CONTENT_BACKUP_DIR" \
|
|
"$LOCK_DIR" "$(dirname "$LOG_FILE")"
|
|
}
|
|
|
|
acquire_lock() {
|
|
local lockfile="${LOCK_DIR}/autoheal-${1}.lock"
|
|
if [[ -f "$lockfile" ]]; then
|
|
local pid
|
|
pid=$(<"$lockfile")
|
|
if kill -0 "$pid" 2>/dev/null; then
|
|
log WARN "Another $1 operation is running (PID $pid), skipping"
|
|
exit 0
|
|
fi
|
|
rm -f "$lockfile"
|
|
fi
|
|
echo $$ > "$lockfile"
|
|
trap "rm -f '$lockfile'" EXIT
|
|
}
|
|
|
|
timestamp() {
|
|
date -u '+%Y%m%d_%H%M%S'
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Safe-point management
|
|
# ──────────────────────────────────────────────
|
|
cmd_set_safepoint() {
|
|
ensure_dirs
|
|
local ts
|
|
ts=$(timestamp)
|
|
cat > "$SAFEPOINT_FILE" <<EOF
|
|
timestamp=$ts
|
|
hostname=$(hostname)
|
|
kernel=$(uname -r)
|
|
uptime=$(uptime -s 2>/dev/null || echo "unknown")
|
|
set_by=${SUDO_USER:-$(whoami)}
|
|
EOF
|
|
log INFO "Safe point set at $ts by ${SUDO_USER:-$(whoami)}"
|
|
}
|
|
|
|
cmd_clear_safepoint() {
|
|
rm -f "$SAFEPOINT_FILE"
|
|
log INFO "Safe point cleared"
|
|
}
|
|
|
|
has_safepoint() {
|
|
[[ -f "$SAFEPOINT_FILE" ]]
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# System backup (daily)
|
|
# ──────────────────────────────────────────────
|
|
cmd_backup_system() {
|
|
ensure_dirs
|
|
acquire_lock "system-backup"
|
|
|
|
local ts
|
|
ts=$(timestamp)
|
|
local archive="${SYSTEM_BACKUP_DIR}/system_${ts}.tar.gz"
|
|
local manifest="${SYSTEM_BACKUP_DIR}/system_${ts}.manifest"
|
|
|
|
log INFO "Starting system backup → $archive"
|
|
|
|
# Collect existing paths only
|
|
local existing_paths=()
|
|
for p in $SYSTEM_PATHS; do
|
|
[[ -e "$p" ]] && existing_paths+=("$p")
|
|
done
|
|
|
|
if [[ ${#existing_paths[@]} -eq 0 ]]; then
|
|
log WARN "No system paths found to back up"
|
|
return 1
|
|
fi
|
|
|
|
# Archive configs and system files
|
|
tar -czf "$archive" "${existing_paths[@]}" 2>/dev/null || true
|
|
|
|
# Capture package list and service state as manifest
|
|
{
|
|
echo "=== PACKAGES ==="
|
|
if command -v dpkg &>/dev/null; then
|
|
dpkg --get-selections
|
|
elif command -v rpm &>/dev/null; then
|
|
rpm -qa --qf '%{NAME}\t%{VERSION}\n'
|
|
fi
|
|
echo ""
|
|
echo "=== ENABLED SERVICES ==="
|
|
if command -v systemctl &>/dev/null; then
|
|
systemctl list-unit-files --state=enabled --no-pager 2>/dev/null || true
|
|
fi
|
|
echo ""
|
|
echo "=== CRONTABS ==="
|
|
for user_home in /var/spool/cron/crontabs/*; do
|
|
[[ -f "$user_home" ]] && echo "--- $(basename "$user_home") ---" && cat "$user_home"
|
|
done 2>/dev/null || true
|
|
} > "$manifest"
|
|
|
|
local size
|
|
size=$(du -sh "$archive" 2>/dev/null | cut -f1)
|
|
log INFO "System backup complete: $archive ($size)"
|
|
|
|
# Prune old system backups (keep $SYSTEM_BACKUP_RETAIN)
|
|
local count
|
|
count=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' | wc -l)
|
|
if [[ "$count" -gt "$SYSTEM_BACKUP_RETAIN" ]]; then
|
|
local to_remove=$((count - SYSTEM_BACKUP_RETAIN))
|
|
find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
|
| sort | head -n "$to_remove" | awk '{print $2}' \
|
|
| while read -r f; do
|
|
rm -f "$f" "${f%.tar.gz}.manifest"
|
|
log INFO "Pruned old system backup: $f"
|
|
done
|
|
fi
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Content backup (every 2 hours)
|
|
# ──────────────────────────────────────────────
|
|
cmd_backup_content() {
|
|
ensure_dirs
|
|
acquire_lock "content-backup"
|
|
|
|
local ts
|
|
ts=$(timestamp)
|
|
local archive="${CONTENT_BACKUP_DIR}/content_${ts}.tar.gz"
|
|
local db_dump="${CONTENT_BACKUP_DIR}/content_${ts}.sql.gz"
|
|
|
|
log INFO "Starting content backup → $archive"
|
|
|
|
# Back up web content / uploads
|
|
local existing_paths=()
|
|
for p in $CONTENT_PATHS; do
|
|
[[ -e "$p" ]] && existing_paths+=("$p")
|
|
done
|
|
|
|
if [[ ${#existing_paths[@]} -gt 0 ]]; then
|
|
tar -czf "$archive" "${existing_paths[@]}" 2>/dev/null || true
|
|
local size
|
|
size=$(du -sh "$archive" 2>/dev/null | cut -f1)
|
|
log INFO "Content files archived: $archive ($size)"
|
|
else
|
|
log WARN "No content paths found to back up"
|
|
fi
|
|
|
|
# Database dump
|
|
if command -v mysqldump &>/dev/null || command -v mariadb-dump &>/dev/null; then
|
|
local dump_cmd="mysqldump"
|
|
command -v mariadb-dump &>/dev/null && dump_cmd="mariadb-dump"
|
|
|
|
local databases=()
|
|
if [[ -n "$DB_NAMES" ]]; then
|
|
read -ra databases <<< "$DB_NAMES"
|
|
else
|
|
# Auto-detect: dump all databases except system ones
|
|
databases=($(${dump_cmd%dump} -N -e \
|
|
"SELECT schema_name FROM information_schema.schemata
|
|
WHERE schema_name NOT IN ('information_schema','performance_schema','mysql','sys')" \
|
|
2>/dev/null | tr '\n' ' ')) || true
|
|
fi
|
|
|
|
if [[ ${#databases[@]} -gt 0 ]]; then
|
|
$dump_cmd --single-transaction --routines --triggers \
|
|
--databases "${databases[@]}" 2>/dev/null \
|
|
| gzip > "$db_dump"
|
|
local db_size
|
|
db_size=$(du -sh "$db_dump" 2>/dev/null | cut -f1)
|
|
log INFO "Database dump complete: $db_dump ($db_size)"
|
|
else
|
|
log WARN "No databases found to dump"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Cleanup — prune content backups older than retention
|
|
# ──────────────────────────────────────────────
|
|
cmd_cleanup() {
|
|
ensure_dirs
|
|
local before_count after_count
|
|
|
|
# Content: keep only last 24 hours (1 day)
|
|
before_count=$(find "$CONTENT_BACKUP_DIR" -name 'content_*' -type f | wc -l)
|
|
find "$CONTENT_BACKUP_DIR" -name 'content_*' -type f \
|
|
-mmin +$((CONTENT_BACKUP_RETAIN_HOURS * 60)) -delete 2>/dev/null || true
|
|
after_count=$(find "$CONTENT_BACKUP_DIR" -name 'content_*' -type f | wc -l)
|
|
local removed=$((before_count - after_count))
|
|
[[ "$removed" -gt 0 ]] && log INFO "Pruned $removed content backup(s) older than ${CONTENT_BACKUP_RETAIN_HOURS}h"
|
|
|
|
# System: keep N most recent (handled in backup-system, but double-check here)
|
|
before_count=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*' -type f | wc -l)
|
|
local max_system_files=$((SYSTEM_BACKUP_RETAIN * 2)) # .tar.gz + .manifest
|
|
if [[ "$before_count" -gt "$max_system_files" ]]; then
|
|
local excess=$((before_count - max_system_files))
|
|
find "$SYSTEM_BACKUP_DIR" -name 'system_*' -type f -printf '%T+ %p\n' \
|
|
| sort | head -n "$excess" | awk '{print $2}' \
|
|
| xargs -r rm -f
|
|
log INFO "Pruned excess system backups"
|
|
fi
|
|
|
|
log INFO "Cleanup complete"
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Boot check — the auto-heal entry point
|
|
# ──────────────────────────────────────────────
|
|
cmd_boot_check() {
|
|
ensure_dirs
|
|
acquire_lock "boot-check"
|
|
|
|
log INFO "=== Boot check started ==="
|
|
log INFO "Hostname: $(hostname), Kernel: $(uname -r)"
|
|
|
|
if has_safepoint; then
|
|
log INFO "Safe point found — server was shut down cleanly"
|
|
log INFO "Clearing safe point for next cycle"
|
|
cmd_clear_safepoint
|
|
log INFO "=== Boot check passed (clean restart) ==="
|
|
return 0
|
|
fi
|
|
|
|
log WARN "NO safe point found — server restarted without clean shutdown"
|
|
log WARN "Initiating auto-heal sequence..."
|
|
|
|
auto_heal
|
|
local rc=$?
|
|
|
|
# Set safe point after successful heal
|
|
if [[ $rc -eq 0 ]]; then
|
|
cmd_set_safepoint
|
|
log INFO "=== Boot check complete (healed successfully) ==="
|
|
else
|
|
log ERROR "=== Boot check FAILED — manual intervention required ==="
|
|
fi
|
|
|
|
return $rc
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Auto-heal strategy
|
|
#
|
|
# TODO: This is the core decision point. Implement the recovery
|
|
# steps that match your server's architecture. See guidance below.
|
|
#
|
|
# Trade-offs to consider:
|
|
# - Restore-from-backup: safest, but content may be up to 2h stale
|
|
# - Service-restart-only: faster, keeps current data, but won't fix
|
|
# corrupted configs or broken filesystem state
|
|
# - Hybrid: restart services first, verify health, only restore if
|
|
# health checks fail — best of both worlds but more complex
|
|
#
|
|
# The function receives no arguments. Use the latest system + content
|
|
# backups to restore if needed. Return 0 on success, 1 on failure.
|
|
# ──────────────────────────────────────────────
|
|
auto_heal() {
|
|
log INFO "Phase 1: Verify and repair filesystem"
|
|
# Check for common post-crash issues
|
|
repair_filesystem
|
|
|
|
log INFO "Phase 2: Restore system configuration if corrupted"
|
|
restore_system_if_needed
|
|
|
|
log INFO "Phase 3: Restart core services"
|
|
restart_services
|
|
|
|
log INFO "Phase 4: Verify health"
|
|
if ! verify_health; then
|
|
log WARN "Health check failed after service restart — restoring from backup"
|
|
restore_from_backup
|
|
restart_services
|
|
|
|
if ! verify_health; then
|
|
log ERROR "Health check still failing after restore — giving up"
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
log INFO "Auto-heal completed successfully"
|
|
return 0
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Heal sub-steps
|
|
# ──────────────────────────────────────────────
|
|
repair_filesystem() {
|
|
# Fix common post-crash filesystem issues
|
|
# Clear stale PID/lock/socket files that prevent services from starting
|
|
local stale_files=(
|
|
/var/run/nginx.pid
|
|
/var/run/mysqld/mysqld.pid
|
|
/var/run/php-fpm.pid
|
|
/var/lib/mysql/*.pid
|
|
)
|
|
for f in "${stale_files[@]}"; do
|
|
for expanded in $f; do
|
|
if [[ -f "$expanded" ]]; then
|
|
local pid
|
|
pid=$(<"$expanded") 2>/dev/null || true
|
|
if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then
|
|
rm -f "$expanded"
|
|
log INFO "Removed stale PID file: $expanded"
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
|
|
# Fix permissions on critical dirs that may get mangled
|
|
[[ -d /var/run/mysqld ]] && chown mysql:mysql /var/run/mysqld 2>/dev/null || true
|
|
[[ -d /var/lib/php/sessions ]] && chmod 1733 /var/lib/php/sessions 2>/dev/null || true
|
|
|
|
# Repair tmp/cache dirs
|
|
for d in /tmp /var/tmp; do
|
|
[[ -d "$d" ]] && chmod 1777 "$d" 2>/dev/null || true
|
|
done
|
|
}
|
|
|
|
restore_system_if_needed() {
|
|
# Find latest system backup
|
|
local latest_system
|
|
latest_system=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
|
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
|
|
|
if [[ -z "$latest_system" ]]; then
|
|
log WARN "No system backup available to verify against"
|
|
return 0
|
|
fi
|
|
|
|
# Check if critical configs exist and are non-empty
|
|
local needs_restore=false
|
|
local critical_configs=("/etc/nginx/nginx.conf" "/etc/php" "/etc/mysql")
|
|
|
|
for cfg in "${critical_configs[@]}"; do
|
|
if [[ -e "$cfg" ]]; then
|
|
# Config exists — check if it's a file and non-empty, or a directory
|
|
if [[ -f "$cfg" && ! -s "$cfg" ]]; then
|
|
log WARN "Critical config is empty: $cfg"
|
|
needs_restore=true
|
|
break
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if $needs_restore; then
|
|
log WARN "Restoring system config from $latest_system"
|
|
tar -xzf "$latest_system" -C / 2>/dev/null || {
|
|
log ERROR "System restore failed from $latest_system"
|
|
return 1
|
|
}
|
|
log INFO "System config restored"
|
|
else
|
|
log INFO "System configs look intact — skipping restore"
|
|
fi
|
|
}
|
|
|
|
restart_services() {
|
|
if ! command -v systemctl &>/dev/null; then
|
|
log WARN "systemctl not available — skipping service restart"
|
|
return 0
|
|
fi
|
|
|
|
local services=("mysql" "mariadb" "nginx" "apache2" "php-fpm" "php8.1-fpm" "php8.2-fpm" "php8.3-fpm")
|
|
|
|
for svc in "${services[@]}"; do
|
|
if systemctl is-enabled "$svc" &>/dev/null; then
|
|
log INFO "Restarting $svc..."
|
|
systemctl restart "$svc" 2>/dev/null && \
|
|
log INFO "$svc restarted OK" || \
|
|
log WARN "$svc restart failed"
|
|
fi
|
|
done
|
|
}
|
|
|
|
verify_health() {
|
|
local failures=0
|
|
|
|
# Check critical services are running
|
|
local services=("mysql" "mariadb" "nginx" "apache2")
|
|
for svc in "${services[@]}"; do
|
|
if systemctl is-enabled "$svc" &>/dev/null; then
|
|
if ! systemctl is-active "$svc" &>/dev/null; then
|
|
log WARN "Service not running: $svc"
|
|
((failures++))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Check if web server responds
|
|
if command -v curl &>/dev/null; then
|
|
if ! curl -sf -o /dev/null --max-time 10 "http://localhost/" 2>/dev/null; then
|
|
log WARN "Local web server not responding"
|
|
((failures++))
|
|
fi
|
|
fi
|
|
|
|
# Check if database accepts connections
|
|
if command -v mysqladmin &>/dev/null; then
|
|
if ! mysqladmin ping --silent 2>/dev/null; then
|
|
log WARN "Database not responding to ping"
|
|
((failures++))
|
|
fi
|
|
fi
|
|
|
|
[[ $failures -eq 0 ]]
|
|
}
|
|
|
|
restore_from_backup() {
|
|
log WARN "=== Full restore from backup ==="
|
|
|
|
# Restore system config
|
|
local latest_system
|
|
latest_system=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
|
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
|
|
|
if [[ -n "$latest_system" ]]; then
|
|
log INFO "Restoring system from $latest_system"
|
|
tar -xzf "$latest_system" -C / 2>/dev/null || \
|
|
log ERROR "System restore failed"
|
|
fi
|
|
|
|
# Restore content
|
|
local latest_content
|
|
latest_content=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.tar.gz' -printf '%T+ %p\n' \
|
|
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
|
|
|
if [[ -n "$latest_content" ]]; then
|
|
log INFO "Restoring content from $latest_content"
|
|
tar -xzf "$latest_content" -C / 2>/dev/null || \
|
|
log ERROR "Content restore failed"
|
|
fi
|
|
|
|
# Restore database
|
|
local latest_db
|
|
latest_db=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.sql.gz' -printf '%T+ %p\n' \
|
|
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
|
|
|
if [[ -n "$latest_db" ]]; then
|
|
log INFO "Restoring database from $latest_db"
|
|
local mysql_cmd="mysql"
|
|
command -v mariadb &>/dev/null && mysql_cmd="mariadb"
|
|
zcat "$latest_db" | $mysql_cmd 2>/dev/null || \
|
|
log ERROR "Database restore failed"
|
|
fi
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Status
|
|
# ──────────────────────────────────────────────
|
|
cmd_status() {
|
|
echo "=== Moko Server Auto-Heal Status ==="
|
|
echo ""
|
|
|
|
# Safe point
|
|
if has_safepoint; then
|
|
echo "Safe point: SET"
|
|
cat "$SAFEPOINT_FILE" | sed 's/^/ /'
|
|
else
|
|
echo "Safe point: NOT SET (will auto-heal on next boot)"
|
|
fi
|
|
echo ""
|
|
|
|
# System backups
|
|
echo "System backups (${SYSTEM_BACKUP_DIR}):"
|
|
local sys_count
|
|
sys_count=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' 2>/dev/null | wc -l)
|
|
echo " Count: $sys_count (retain $SYSTEM_BACKUP_RETAIN)"
|
|
local latest_sys
|
|
latest_sys=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
|
2>/dev/null | sort -r | head -1)
|
|
if [[ -n "$latest_sys" ]]; then
|
|
echo " Latest: $(echo "$latest_sys" | awk '{print $2}')"
|
|
echo " Timestamp: $(echo "$latest_sys" | awk '{print $1}')"
|
|
else
|
|
echo " Latest: (none)"
|
|
fi
|
|
echo ""
|
|
|
|
# Content backups
|
|
echo "Content backups (${CONTENT_BACKUP_DIR}):"
|
|
local cnt_count
|
|
cnt_count=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.tar.gz' 2>/dev/null | wc -l)
|
|
echo " Count: $cnt_count (retain ${CONTENT_BACKUP_RETAIN_HOURS}h)"
|
|
local latest_cnt
|
|
latest_cnt=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.tar.gz' -printf '%T+ %p\n' \
|
|
2>/dev/null | sort -r | head -1)
|
|
if [[ -n "$latest_cnt" ]]; then
|
|
echo " Latest: $(echo "$latest_cnt" | awk '{print $2}')"
|
|
echo " Timestamp: $(echo "$latest_cnt" | awk '{print $1}')"
|
|
else
|
|
echo " Latest: (none)"
|
|
fi
|
|
echo ""
|
|
|
|
# Disk usage
|
|
echo "Backup disk usage:"
|
|
du -sh "$SYSTEM_BACKUP_DIR" "$CONTENT_BACKUP_DIR" 2>/dev/null | sed 's/^/ /'
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Install helper — sets up cron + systemd
|
|
# ──────────────────────────────────────────────
|
|
cmd_install() {
|
|
local script_path
|
|
script_path=$(readlink -f "$0")
|
|
|
|
echo "Installing Moko Auto-Heal..."
|
|
|
|
# Create config directory
|
|
mkdir -p /etc/moko "$(dirname "$LOG_FILE")" "$LOCK_DIR"
|
|
|
|
# Write example config if none exists
|
|
if [[ ! -f "$CONF_FILE" ]]; then
|
|
cat > "$CONF_FILE" <<'CONF'
|
|
# /etc/moko/autoheal.conf — Server auto-heal configuration
|
|
# Uncomment and modify as needed
|
|
|
|
# BACKUP_ROOT="/var/backups/moko"
|
|
# SAFEPOINT_FILE="/var/run/moko/safepoint"
|
|
# LOG_FILE="/var/log/moko/autoheal.log"
|
|
|
|
# System backup paths (space-separated)
|
|
# SYSTEM_PATHS="/etc/nginx /etc/php /etc/mysql /etc/cron.d /etc/systemd/system"
|
|
|
|
# Content backup paths (space-separated)
|
|
# CONTENT_PATHS="/var/www"
|
|
|
|
# Database names (space-separated, empty = auto-detect all)
|
|
# DB_NAMES=""
|
|
|
|
# Retention
|
|
# SYSTEM_BACKUP_RETAIN=7 # daily backups to keep
|
|
# CONTENT_BACKUP_RETAIN_HOURS=24 # hours of content backups to keep
|
|
CONF
|
|
echo " Created config: $CONF_FILE"
|
|
fi
|
|
|
|
# Install cron jobs
|
|
local cron_file="/etc/cron.d/moko-autoheal"
|
|
cat > "$cron_file" <<CRON
|
|
# Moko Server Auto-Heal — managed by server-autoheal.sh install
|
|
SHELL=/bin/bash
|
|
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
|
|
# Boot check — auto-heal if no safe point
|
|
@reboot root ${script_path} boot-check
|
|
|
|
# System backup — daily at 3:00 AM
|
|
0 3 * * * root ${script_path} backup-system
|
|
|
|
# Content backup — every 2 hours
|
|
0 */2 * * * root ${script_path} backup-content
|
|
|
|
# Cleanup expired backups — 30 min after each content backup
|
|
30 */2 * * * root ${script_path} cleanup
|
|
CRON
|
|
echo " Installed cron: $cron_file"
|
|
|
|
# Install shutdown hook to set safe point on clean shutdown
|
|
local shutdown_hook="/etc/systemd/system/moko-safepoint.service"
|
|
cat > "$shutdown_hook" <<UNIT
|
|
[Unit]
|
|
Description=Moko Safe Point — mark clean shutdown
|
|
DefaultDependencies=no
|
|
Before=shutdown.target reboot.target halt.target
|
|
|
|
[Service]
|
|
Type=oneshot
|
|
RemainAfterExit=yes
|
|
ExecStart=/bin/true
|
|
ExecStop=${script_path} set-safepoint
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
UNIT
|
|
systemctl daemon-reload
|
|
systemctl enable moko-safepoint.service
|
|
echo " Installed systemd hook: $shutdown_hook"
|
|
|
|
echo ""
|
|
echo "Done! Edit $CONF_FILE to configure paths for your server."
|
|
echo "Run '${script_path} status' to verify."
|
|
}
|
|
|
|
# ──────────────────────────────────────────────
|
|
# Main dispatcher
|
|
# ──────────────────────────────────────────────
|
|
main() {
|
|
local cmd="${1:-help}"
|
|
|
|
case "$cmd" in
|
|
boot-check) cmd_boot_check ;;
|
|
set-safepoint) cmd_set_safepoint ;;
|
|
clear-safepoint) cmd_clear_safepoint ;;
|
|
backup-system) cmd_backup_system ;;
|
|
backup-content) cmd_backup_content ;;
|
|
cleanup) cmd_cleanup ;;
|
|
status) cmd_status ;;
|
|
install) cmd_install ;;
|
|
help|--help|-h)
|
|
sed -n '2,/^$/s/^# //p' "$0"
|
|
echo ""
|
|
echo "Commands: boot-check, set-safepoint, clear-safepoint,"
|
|
echo " backup-system, backup-content, cleanup, status, install"
|
|
;;
|
|
*)
|
|
echo "Unknown command: $cmd" >&2
|
|
echo "Run '$0 help' for usage" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|