Posted in

OpenShift – Cluster Graceful Shutdown Script v2

I noticed the last script did not cater for the scenario that the node is an SNO. What happened to my SNO was that many pods were not scheduled and I lost access to the console and api. The only way I could recover it was to get into the SNO via SSH, and uncordon the node using the kubeconfig stored within.

In this case, I’ve done an upgrade to the script. Enjoy the script!

Script
#!/bin/bash
# OpenShift 4.18 Graceful Shutdown Script for Air-gapped Environment
# Compatible with RHEL 9.6
# Script to gracefully shutdown OpenShift cluster
# SNO-safe: skips cordon/drain on Single Node OpenShift and ensures node is uncordoned before shutdown

set -euo pipefail

# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Logging functions
log()   { echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} $1"; }
warn()  { echo -e "${YELLOW}[$(date '+%Y-%m-%d %H:%M:%S')] WARNING:${NC} $1"; }
error() { echo -e "${RED}[$(date '+%Y-%m-%d %H:%M:%S')] ERROR:${NC} $1"; exit 1; }

# Confirmation prompt
confirm() {
  read -p "$1 [y/N]: " -r
  echo
  [[ ${REPLY:-} =~ ^[Yy]$ ]]
}

# Detect Single Node OpenShift
is_sno() {
  local count
  count="$(oc get nodes --no-headers 2>/dev/null | wc -l | tr -d ' ')"
  [[ "$count" -eq 1 ]]
}

# Check prerequisites
check_prerequisites() {
  log "Checking prerequisites..."

  # Check if oc command is available
  if ! command -v oc &> /dev/null; then
    error "OpenShift CLI (oc) not found. Please install and configure oc command."
  fi

  # jq used for JSON processing when present (optional but recommended)
  if ! command -v jq &> /dev/null; then
    warn "jq not found. Some advanced filtering will be skipped."
  fi

  # Check if we have cluster-admin access
  if ! oc auth can-i '*' '*' --all-namespaces &> /dev/null; then
    error "You need cluster-admin privileges to run this script."
  fi

  # Check if cluster is accessible
  if ! oc get nodes &> /dev/null; then
    error "Cannot access the cluster. Please check your connection and authentication."
  fi

  log "Prerequisites check passed."
}

# Check certificate expiration
check_certificate_expiry() {
  log "Checking certificate expiration date..."

  cert_expiry="$(oc -n openshift-kube-apiserver-operator get secret kube-apiserver-to-kubelet-signer \
    -o jsonpath='{.metadata.annotations.auth\.openshift\.io/certificate-not-after}' 2>/dev/null || echo "")"

  if [[ -n "$cert_expiry" ]]; then
    log "Certificate expires on: $cert_expiry"
    warn "Ensure the cluster is restarted before certificate expiry to avoid issues."
  else
    warn "Could not retrieve certificate expiry information."
  fi
}

# Take etcd backup (confirmation gate)
take_etcd_backup() {
  log "Checking for recent etcd backup..."
  warn "It is strongly recommended to take an etcd backup before shutdown."
  warn "Please refer to the OpenShift documentation for etcd backup procedures."

  if ! confirm "Do you confirm that you have taken a recent etcd backup?"; then
    error "Please take an etcd backup before proceeding with cluster shutdown."
  fi
}

# Cordoning and draining worker nodes (SNO-safe)
drain_worker_nodes() {
  log "Cordoning and draining worker nodes..."

  if is_sno; then
    warn "Single Node OpenShift detected. Skipping cordon/drain to keep the node schedulable."
    return 0
  fi

  # Get nodes labeled as worker
  worker_nodes="$(oc get nodes -l node-role.kubernetes.io/worker -o jsonpath='{.items[*].metadata.name}' 2>/dev/null || echo "")"
  if [[ -z "$worker_nodes" ]]; then
    warn "No dedicated worker nodes found. This might be a compact cluster. Skipping drain."
    return 0
  fi

  # Exclude nodes that are also control-plane/master if jq is available
  filtered_workers=""
  if command -v jq &>/dev/null; then
    for n in $worker_nodes; do
      if ! oc get node "$n" -o json \
        | jq -e '.metadata.labels["node-role.kubernetes.io/master"] or .metadata.labels["node-role.kubernetes.io/control-plane"]' >/dev/null; then
        filtered_workers+="$n "
      fi
    done
  else
    # Fallback without jq (best effort): leave list as-is
    filtered_workers="$worker_nodes"
  fi

  if [[ -z "$filtered_workers" ]]; then
    warn "No dedicated workers after filtering; skipping drain."
    return 0
  fi

  # Cordon all filtered workers
  for node in $filtered_workers; do
    log "Cordoning node: $node"
    oc adm cordon "$node" || warn "Failed to cordon $node"
  done

  # Drain all filtered workers
  for node in $filtered_workers; do
    log "Draining node: $node"
    oc adm drain "$node" --delete-emptydir-data --ignore-daemonsets=true --timeout=15s --force \
      || warn "Failed to drain $node completely"
  done

  log "Dedicated worker nodes cordoned and drained."
}

# Handle special workloads (if ODF/Ceph is present)
handle_special_workloads() {
  log "Checking for OpenShift Data Foundation (ODF) workloads..."

  if oc get namespace openshift-storage &>/dev/null; then
    warn "OpenShift Data Foundation detected. Special shutdown sequence may be required."

    if command -v jq &>/dev/null; then
      odf_pvcs="$(oc get pvc -A -o json \
        | jq -r '.items[] | select(.spec.storageClassName // "" | test("ocs-storagecluster-ceph")) | .metadata.namespace + "/" + .metadata.name' 2>/dev/null || echo "")"

      if [[ -n "$odf_pvcs" ]]; then
        log "Found workloads using ODF storage. Shutting down pods using ODF storage..."
        while IFS= read -r pvc; do
          ns="${pvc%%/*}"
          claim="${pvc##*/}"
          pods="$(oc get pods -n "$ns" -o json \
            | jq -r --arg claim "$claim" '.items[] | select(.spec.volumes[]? | select(.persistentVolumeClaim.claimName == $claim)) | .metadata.name' 2>/dev/null || echo "")"
          for pod in $pods; do
            log "Deleting pod $pod in namespace $ns (using ODF storage)"
            oc delete pod "$pod" -n "$ns" --grace-period=30 || warn "Failed to delete pod $pod"
          done
        done <<< "$odf_pvcs"
      fi
    else
      warn "jq not available; skipping fine-grained ODF PVC/pod cleanup."
    fi
  fi
}

# Ensure SNO node is schedulable (uncordon + clear taints)
ensure_sno_schedulable() {
  if is_sno; then
    NODE="$(oc get node -o jsonpath='{.items[0].metadata.name}')"
    log "Ensuring SNO node is uncordoned before shutdown: $NODE"
    oc adm uncordon "$NODE" || warn "Failed to uncordon SNO node $NODE"
    # Clear common control-plane NoSchedule taints if present
    oc adm taint nodes "$NODE" node-role.kubernetes.io/master- || true
    oc adm taint nodes "$NODE" node-role.kubernetes.io/control-plane- || true
  fi
}

# Shutdown cluster nodes
shutdown_nodes() {
  local shutdown_delay="${1:-1}"

  log "Shutting down all cluster nodes with ${shutdown_delay} minute delay..."

  all_nodes="$(oc get nodes -o jsonpath='{.items[*].metadata.name}')"
  if [[ -z "$all_nodes" ]]; then
    error "No nodes found in the cluster."
  fi

  node_count="$(echo "$all_nodes" | wc -w)"
  if [[ $node_count -ge 10 ]] && [[ $shutdown_delay -eq 1 ]]; then
    shutdown_delay=10
    warn "Large cluster detected ($node_count nodes). Using ${shutdown_delay} minute shutdown delay."
  fi

  for node in $all_nodes; do
    log "Scheduling shutdown for node: $node"
    oc debug node/"$node" -- chroot /host shutdown -h "$shutdown_delay" || warn "Failed to schedule shutdown for $node"
  done

  log "Shutdown scheduled for all nodes with ${shutdown_delay} minute delay."
}

# Main shutdown function
main() {
  log "Starting OpenShift 4.18 cluster graceful shutdown..."

  log "Cluster information:"
  oc get nodes -o wide | head -10

  # Final confirmation
  if ! confirm "Are you sure you want to proceed with cluster shutdown?"; then
    log "Shutdown cancelled by user."
    exit 0
  fi

  # Execute shutdown steps
  check_prerequisites
  check_certificate_expiry
  take_etcd_backup
  handle_special_workloads
  drain_worker_nodes

  # Ask for custom shutdown delay
  read -p "Enter shutdown delay in minutes (default: 1, recommended: 5 for large clusters): " -r custom_delay || true
  delay="${custom_delay:-1}"

  # Validate delay is a number
  if ! [[ "$delay" =~ ^[0-9]+$ ]]; then
    warn "Invalid delay specified. Using default delay of 1 minute."
    delay=1
  fi

  # Safety net for SNO: ensure node is schedulable before power-off
  ensure_sno_schedulable

  shutdown_nodes "$delay"

  log "Cluster shutdown initiated successfully!"
  log "All nodes will shut down in $delay minute(s)."
  log ""
  log "IMPORTANT REMINDERS:"
  log "1. For SNO, this script ensures the node is UN-CORDONED and taints cleared before shutdown."
  log "2. For multi-node clusters, remember to uncordon drained workers when restarting:"
  log "   oc adm uncordon <node-name>"
  log "3. Check cluster certificate expiry before restart"
  log "4. Ensure etcd backup is available if recovery is needed"
  log ""
  log "For cluster restart procedures, refer to OpenShift documentation."
}

# Execute main function
main "$@"

Leave a Reply

Your email address will not be published. Required fields are marked *