#!/usr/bin/env bash
# Synex Boot Redundancy (Phase 2 helper)
# Replicates /boot (ext4) and /boot/efi (vfat) across all pool member disks.
#
# Assumptions (UEFI-only for now):
#   diskX1 = EFI (vfat)
#   diskX2 = /boot (ext4)
#   diskX3 = ZFS member partition
#
# Behavior:
#   - status: does NOT mount anything (reports what it sees)
#   - sync: ensures SOURCE mounts exist on /boot and /boot/efi (mounts if needed)
#           then mounts each target under /run/synex-boot-redundancy and:
#             * (optional) mkfs target parts if --reformat
#             * rsync /boot -> target boot
#             * rsync /boot/efi -> target efi
#             * patch target /boot/grub/grub.cfg UUID search lines to match target BOOT UUID
#             * grub-install to target EFI/BOOT (removable)
#
# Usage:
#   synex-boot-redundancy [--dry-run] status
#   synex-boot-redundancy [--dry-run] sync [--reformat] [--all]

set -euo pipefail

DRY_RUN=0
DO_REFORMAT=0
DO_ALL=0

RUNBASE="/run/synex-boot-redundancy"
LOG="/var/log/synex-boot-redundancy.log"

mkdir -p "$(dirname "$LOG")"
exec > >(tee -a "$LOG") 2>&1

die()  { echo "[ERROR] $*" >&2; exit 1; }
info() { echo "[INFO] $*"; }
warn() { echo "[WARN] $*"; }

require_cmd() { command -v "$1" >/dev/null 2>&1 || die "Missing required command: $1"; }

usage() {
  cat <<EOF
Usage:
  synex-boot-redundancy [--dry-run] status
  synex-boot-redundancy [--dry-run] sync [--reformat] [--all]

Options:
  --dry-run    Print actions only, do not change anything
  --reformat   Reformat target EFI+BOOT partitions before copying (DANGEROUS)
  --all        Also process source disk (normally skipped)

Notes:
  - status does NOT mount anything
  - sync ensures SOURCE mounts exist on /boot and /boot/efi
  - targets mount under: ${RUNBASE}

EOF
}

# -----------------------------
# Helpers
# -----------------------------
run() {
  if [[ $DRY_RUN -eq 1 ]]; then
    echo "[DRY-RUN] $*"
    return 0
  fi
  "$@"
}

mnt_source() {
  # prints source device if mounted, else empty
  findmnt -no SOURCE "$1" 2>/dev/null || true
}

fs_type() {
  # prints filesystem TYPE for a block device (ext4, vfat, xfs, ...)
  blkid -s TYPE -o value "$1" 2>/dev/null || true
}

disk_from_part() {
  local p="$1"
  if [[ "$p" =~ ^/dev/nvme[0-9]+n[0-9]+p[0-9]+$ ]]; then
    echo "$p" | sed -E 's/p[0-9]+$//'
  elif [[ "$p" =~ ^/dev/mmcblk[0-9]+p[0-9]+$ ]]; then
    echo "$p" | sed -E 's/p[0-9]+$//'
  else
    echo "$p" | sed -E 's/[0-9]+$//'
  fi
}

part_of_disk() {
  local d="$1" n="$2"
  if [[ "$d" =~ nvme ]] || [[ "$d" =~ mmcblk ]]; then
    echo "${d}p${n}"
  else
    echo "${d}${n}"
  fi
}

detect_pool() {
  local p=""
  if command -v zfs >/dev/null 2>&1; then
    p="$(zfs list -H -o name,mountpoint 2>/dev/null | awk '$2=="/"{print $1; exit}' | cut -d/ -f1)"
  fi
  if [[ -z "$p" ]] && command -v zpool >/dev/null 2>&1; then
    p="$(zpool list -H -o name 2>/dev/null | head -n1 || true)"
  fi
  echo "$p"
}

pool_member_parts() {
  local pool="$1"
  # Prefer zpool status (stable)
  zpool status "$pool" 2>/dev/null \
    | awk '
        $1 ~ "^/dev/" {print $1}
        $1 ~ "^[a-zA-Z].*[0-9]+$" {print "/dev/"$1}
      ' \
    | sed -E 's/^([^ ]+).*/\1/' \
    | grep -E '^/dev/' \
    | grep -Ev 'mirror|raidz|spare|logs|cache|special|dedup|replacing|replaced|NAME|pool:' \
    | sort -u
}

pool_member_disks() {
  local parts
  mapfile -t parts < <(pool_member_parts "$1" || true)
  for p in "${parts[@]}"; do
    disk_from_part "$p"
  done | sort -u
}

is_uefi() {
  [[ -d /sys/firmware/efi/efivars ]]
}

detect_efi_part() {
  local disk="$1"
  echo "$(part_of_disk "$disk" 1)"
}
detect_boot_part() {
  local disk="$1"
  echo "$(part_of_disk "$disk" 2)"
}

ensure_source_mounts() {
  local src_disk="$1"

  local src_boot_part src_efi_part
  src_boot_part="$(detect_boot_part "$src_disk")"
  src_efi_part="$(detect_efi_part "$src_disk")"

  local src_boot_dev src_efi_dev
  src_boot_dev="$(mnt_source /boot)"
  src_efi_dev="$(mnt_source /boot/efi)"

  if [[ -z "$src_boot_dev" ]]; then
    info "Source /boot is not mounted. Mounting ${src_boot_part} -> /boot"
    run mkdir -p /boot
    run mount -t ext4 "$src_boot_part" /boot
  else
    info "Source /boot mounted: $src_boot_dev"
    echo "$src_boot_dev"
  fi

  if [[ -z "$src_efi_dev" ]]; then
    info "Source /boot/efi is not mounted. Mounting ${src_efi_part} -> /boot/efi"
    run mkdir -p /boot/efi
    run mount -t vfat "$src_efi_part" /boot/efi
  else
    info "Source /boot/efi mounted: $src_efi_dev"
    echo "$src_efi_dev"
  fi

  # sanity
  if [[ ! -d /boot/grub ]]; then
    die "/boot/grub not found. Source /boot is not a valid Synex boot partition mount."
  fi
  if [[ ! -f /boot/grub/grub.cfg ]]; then
    warn "Source /boot/grub/grub.cfg not found."
    warn "This indicates /boot was NOT mounted during grub generation."
    warn "Fix (one time): mount /boot and /boot/efi, then run:"
    warn "  update-initramfs -u -k all && update-grub"
    die "Cannot proceed without source /boot/grub/grub.cfg."
  fi
}

reformat_target_parts() {
  local efi_part="$1" boot_part="$2"
  warn "REFORMATTING target partitions: EFI=$efi_part BOOT=$boot_part"
  run mkfs.vfat -F 32 -n EFI "$efi_part"
  run mkfs.ext4 -F -L boot "$boot_part"
}

ensure_target_ready_or_die() {
  # If --reformat is NOT set, we REQUIRE:
  #   BOOT=ext4, EFI=vfat.
  # Because the installer Phase1 does NOT format disk2..N boot parts,
  # the first replication run must use --reformat once.
  local tgt_efi="$1" tgt_boot="$2"

  if [[ $DO_REFORMAT -eq 1 ]]; then
    return 0
  fi

  local bt et
  bt="$(fs_type "$tgt_boot")"
  et="$(fs_type "$tgt_efi")"

  if [[ "$bt" != "ext4" ]]; then
    if [[ -z "$bt" ]]; then
      die "Target BOOT $tgt_boot has NO filesystem. First run needs: synex-boot-redundancy sync --reformat"
    fi
    die "Target BOOT $tgt_boot is '$bt' (expected ext4). Use: synex-boot-redundancy sync --reformat"
  fi

  if [[ "$et" != "vfat" ]]; then
    if [[ -z "$et" ]]; then
      die "Target EFI $tgt_efi has NO filesystem. First run needs: synex-boot-redundancy sync --reformat"
    fi
    die "Target EFI $tgt_efi is '$et' (expected vfat). Use: synex-boot-redundancy sync --reformat"
  fi
}

patch_grub_uuid() {
  local target_boot_mnt="$1"
  local tgt_boot_part="$2"

  local uuid_new
  uuid_new="$(blkid -s UUID -o value "$tgt_boot_part" 2>/dev/null || true)"
  [[ -n "$uuid_new" ]] || die "Cannot get UUID for $tgt_boot_part"

  local cfg="${target_boot_mnt}/grub/grub.cfg"
  [[ -f "$cfg" ]] || die "Missing target grub.cfg: $cfg"

  info "Patching grub.cfg search UUID -> $uuid_new"
  # Replace UUIDs in: search --no-floppy --fs-uuid --set=root <uuid>
  run sed -i -E "s/(search --no-floppy --fs-uuid --set=root[[:space:]]+)[0-9a-fA-F-]+/\\1${uuid_new}/g" "$cfg"
}

install_grub_target() {
  local tgt_efi_mnt="$1" tgt_boot_mnt="$2"

  info "Installing GRUB to target (EFI removable) using:"
  info "  --efi-directory=$tgt_efi_mnt"
  info "  --boot-directory=$tgt_boot_mnt"

  run grub-install \
    --target=x86_64-efi \
    --efi-directory="$tgt_efi_mnt" \
    --boot-directory="$tgt_boot_mnt" \
    --bootloader-id=Synex \
    --no-nvram \
    --removable \
    --recheck

  # Ensure fallback path exists
  run mkdir -p "$tgt_efi_mnt/EFI/BOOT"

  # On some setups, grub-install writes to EFI/BOOT directly with removable.
  # We still ensure BOOTX64.EFI is present.
  if [[ -f "$tgt_efi_mnt/EFI/Synex/grubx64.efi" ]]; then
    run cp -f "$tgt_efi_mnt/EFI/Synex/grubx64.efi" "$tgt_efi_mnt/EFI/BOOT/BOOTX64.EFI"
  fi

  run sync
}

sync_one_disk() {
  local pool="$1" src_disk="$2" tgt_disk="$3"

  local tgt_efi tgt_boot
  tgt_efi="$(detect_efi_part "$tgt_disk")"
  tgt_boot="$(detect_boot_part "$tgt_disk")"

  info "----------------------------------------"
  info "Target disk: $tgt_disk"
  info "  EFI : $tgt_efi"
  info "  BOOT: $tgt_boot"
  info "----------------------------------------"

  [[ -b "$tgt_efi"  ]] || die "Missing target EFI partition: $tgt_efi"
  [[ -b "$tgt_boot" ]] || die "Missing target BOOT partition: $tgt_boot"

  # If asked, reformat targets first.
  if [[ $DO_REFORMAT -eq 1 ]]; then
    reformat_target_parts "$tgt_efi" "$tgt_boot"
  else
    # Otherwise, enforce expected FS types for clean errors.
    ensure_target_ready_or_die "$tgt_efi" "$tgt_boot"
  fi

  # Mount targets
  run mkdir -p "$RUNBASE/tgt-boot" "$RUNBASE/tgt-efi"

  run umount -lf "$RUNBASE/tgt-efi" 2>/dev/null || true
  run umount -lf "$RUNBASE/tgt-boot" 2>/dev/null || true

  run mount -t ext4 "$tgt_boot" "$RUNBASE/tgt-boot"
  run mkdir -p "$RUNBASE/tgt-boot/efi"
  run mount -t vfat "$tgt_efi" "$RUNBASE/tgt-boot/efi"

  # Copy BOOT
  info "Rsync BOOT: /boot -> $RUNBASE/tgt-boot"
  run rsync -aHAX --numeric-ids --delete /boot/ "$RUNBASE/tgt-boot/"

  # Copy EFI
  info "Rsync EFI: /boot/efi -> $RUNBASE/tgt-boot/efi"
  run rsync -aHAX --numeric-ids --delete /boot/efi/ "$RUNBASE/tgt-boot/efi/"

  # Patch grub UUID on target /boot
  patch_grub_uuid "$RUNBASE/tgt-boot" "$tgt_boot"

  # Install GRUB to target disk's EFI with boot-directory at target boot
  install_grub_target "$RUNBASE/tgt-boot/efi" "$RUNBASE/tgt-boot"

  # Unmount targets
  run umount -lf "$RUNBASE/tgt-boot/efi" || true
  run umount -lf "$RUNBASE/tgt-boot" || true

  info "Done: $tgt_disk"
}

# -----------------------------
# Commands
# -----------------------------
cmd_status() {
  require_cmd zpool
  require_cmd zfs
  require_cmd blkid
  require_cmd findmnt

  local pool
  pool="$(detect_pool)"
  [[ -n "$pool" ]] || die "Cannot detect pool"

  info "Detected pool: $pool"

  info "Pool member partitions:"
  mapfile -t parts < <(pool_member_parts "$pool" || true)
  for p in "${parts[@]}"; do
    echo "  - $p"
  done

  info "Pool member disks:"
  mapfile -t disks < <(pool_member_disks "$pool" || true)
  for d in "${disks[@]}"; do
    echo "  - $d"
  done

  # Source mounts visibility only (do not mount in status)
  local sb se
  sb="$(mnt_source /boot)"
  se="$(mnt_source /boot/efi)"

  if [[ -n "$sb" ]]; then
    info "Source /boot mount: $sb"
    findmnt /boot || true
  else
    warn "Source /boot is NOT mounted."
  fi

  if [[ -n "$se" ]]; then
    info "Source /boot/efi mount: $se"
    findmnt /boot/efi || true
  else
    warn "Source /boot/efi is NOT mounted."
  fi

  if [[ ! -f /boot/grub/grub.cfg ]]; then
    warn "MISSING: /boot/grub/grub.cfg"
  fi
  if ! ls /boot/vmlinuz-* >/dev/null 2>&1; then
    warn "No /boot/vmlinuz-* found. This suggests /boot is not mounted in runtime."
  fi

  info "Hint: expected SOURCE partitions (Phase 1 disk=/dev/vda):"
  info "  /boot     <- /dev/vda2"
  info "  /boot/efi <- /dev/vda1"
}

cmd_sync() {
  require_cmd zpool
  require_cmd zfs
  require_cmd blkid
  require_cmd findmnt
  require_cmd rsync
  require_cmd grub-install
  require_cmd sed

  is_uefi || die "UEFI not detected. This script currently supports UEFI only."

  local pool
  pool="$(detect_pool)"
  [[ -n "$pool" ]] || die "Cannot detect pool"
  info "Using pool: $pool"

  mapfile -t disks < <(pool_member_disks "$pool" || true)
  [[ "${#disks[@]}" -gt 0 ]] || die "No pool member disks detected."

  # Determine SOURCE disk as the disk that backs mounted /boot (or fallback to first disk)
  local src_boot_dev src_disk
  src_boot_dev="$(mnt_source /boot)"
  if [[ -n "$src_boot_dev" ]]; then
    src_disk="$(disk_from_part "$src_boot_dev")"
  else
    # fallback (Phase 1)
    src_disk="${disks[0]}"
  fi

  # Ensure source mounts exist for copy
  ensure_source_mounts "$src_disk"

  # Re-detect source boot device now that mounts are ensured
  src_boot_dev="$(mnt_source /boot)"
  [[ -n "$src_boot_dev" ]] || die "Cannot resolve /boot mount source after mounting."
  src_disk="$(disk_from_part "$src_boot_dev")"

  info "Source boot device: $src_boot_dev (disk=$src_disk)"

  run mkdir -p "$RUNBASE"

  for d in "${disks[@]}"; do
    if [[ "$d" == "$src_disk" && $DO_ALL -eq 0 ]]; then
      info "Skipping source disk (use --all to include): $d"
      continue
    fi
    sync_one_disk "$pool" "$src_disk" "$d"
  done

  info "All done."
}

# -----------------------------
# Arg parsing
# -----------------------------
args=("$@")
cmd=""

# Global flags allowed before command
while [[ ${#args[@]} -gt 0 ]]; do
  case "${args[0]}" in
    --dry-run) DRY_RUN=1; args=("${args[@]:1}") ;;
    --reformat) DO_REFORMAT=1; args=("${args[@]:1}") ;;
    --all) DO_ALL=1; args=("${args[@]:1}") ;;
    -h|--help) usage; exit 0 ;;
    status|sync) cmd="${args[0]}"; args=("${args[@]:1}"); break ;;
    *) break ;;
  esac
done

# Allow flags after command too (compat)
while [[ ${#args[@]} -gt 0 ]]; do
  case "${args[0]}" in
    --dry-run) DRY_RUN=1 ;;
    --reformat) DO_REFORMAT=1 ;;
    --all) DO_ALL=1 ;;
    -h|--help) usage; exit 0 ;;
    *) die "Unknown argument: ${args[0]}" ;;
  esac
  args=("${args[@]:1}")
done

[[ -n "$cmd" ]] || { usage; exit 1; }

case "$cmd" in
  status) cmd_status ;;
  sync)   cmd_sync ;;
  *) die "Unknown command: $cmd" ;;
esac
