Use timeout to check for services status
authorGiulio Fidente <gfidente@redhat.com>
Fri, 18 Dec 2015 18:02:19 +0000 (19:02 +0100)
committerJiri Stransky <jistr@redhat.com>
Tue, 26 Jan 2016 14:21:24 +0000 (15:21 +0100)
Replaces the bash loop with the timeout command in the piloted
cluster restart to minimize downtime.

Change-Id: I9067eed9626ae5aff833d7a9a9ad1e1a6c026327
Co-Authored-By: Jiri Stransky <jistr@redhat.com>
extraconfig/tasks/pacemaker_resource_restart.sh

index 1220109..dfc335b 100755 (executable)
@@ -7,11 +7,14 @@ check_interval=3
 
 function check_resource {
 
+  if [ "$#" -ne 3 ]; then
+      echo "ERROR: check_resource function expects 3 parameters, $# given" | tee /dev/fd/2
+      exit 1
+  fi
+
   service=$1
   state=$2
   timeout=$3
-  tstart=$(date +%s)
-  tend=$(( $tstart + $timeout ))
 
   if [ "$state" = "stopped" ]; then
       match_for_incomplete='Started'
@@ -19,20 +22,18 @@ function check_resource {
       match_for_incomplete='Stopped'
   fi
 
-  while (( $(date +%s) < $tend )); do
+  if timeout -k 10 $timeout crm_resource --wait; then
       node_states=$(pcs status --full | grep "$service" | grep -v Clone)
       if echo "$node_states" | grep -q "$match_for_incomplete"; then
-          echo "$service not yet $state, sleeping $check_interval seconds."
-          sleep $check_interval
+          echo "ERROR: cluster settled but $service was not in $state state, exiting." | tee /dev/fd/2
+          exit 1
       else
         echo "$service has $state"
-        timeout -k 10 $timeout crm_resource --wait
-        return
       fi
-  done
-
-  echo "$service never $state after $timeout seconds" | tee /dev/fd/2
-  exit 1
+  else
+      echo "ERROR: cluster remained unstable for more than $timeout seconds, exiting." | tee /dev/fd/2
+      exit 1
+  fi
 
 }