Merge "Add Ceilometer API and Collector service to roles_data"

[apex-tripleo-heat-templates.git] / extraconfig / tasks / yum_update.sh
diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh

index 3bf72f1..a2a04e8 100755 (executable)
--- a/extraconfig/tasks/yum_update.sh
+++ b/extraconfig/tasks/yum_update.sh
@@ -38,38 +38,53 @@ if [[ -a "$timestamp_file" ]]; then
  fi
  touch "$timestamp_file"
  
-command_arguments=${command_arguments:-}
-
-list_updates=$(yum list updates)
-
-if [[ "$list_updates" == "" ]]; then
-    echo "No packages require updating"
-    exit 0
-fi
-
  pacemaker_status=""
-if hiera -c /etc/puppet/hiera.yaml service_names | grep -q pacemaker; then
+# We include word boundaries in order to not match pacemaker_remote
+if hiera -c /etc/puppet/hiera.yaml service_names | grep -q '\bpacemaker\b'; then
      pacemaker_status=$(systemctl is-active pacemaker)
  fi
  
-# Fix the redis/rabbit resource start/stop timeouts. See https://bugs.launchpad.net/tripleo/+bug/1633455
-# and https://bugs.launchpad.net/tripleo/+bug/1634851
+# (NB: when backporting this s/pacemaker_short_bootstrap_node_name/bootstrap_nodeid)
+# This runs before the yum_update so we are guaranteed to run it even in the absence
+# of packages to update (the check for -z "$update_identifier" guarantees that this
+# is run only on overcloud stack update -i)
  if [[ "$pacemaker_status" == "active" && \
-      "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]] ; then
-    if pcs resource show rabbitmq | grep -E "start.*timeout=100"; then
-        pcs resource update rabbitmq op start timeout=200s
-    fi
-    if pcs resource show rabbitmq | grep -E "stop.*timeout=90"; then
-        pcs resource update rabbitmq op stop timeout=200s
-    fi
-    if pcs resource show redis | grep -E "start.*timeout=120"; then
-        pcs resource update redis op start timeout=200s
-    fi
-    if pcs resource show redis | grep -E "stop.*timeout=120"; then
-        pcs resource update redis op stop timeout=200s
+        "$(hiera -c /etc/puppet/hiera.yaml pacemaker_short_bootstrap_node_name | tr '[:upper:]' '[:lower:]')" == "$(facter hostname | tr '[:upper:]' '[:lower:]')" ]] ; then \
+    # OCF scripts don't cope with -eu
+    echo "Verifying if we need to fix up any IPv6 VIPs"
+    set +eu
+    fixup_wrong_ipv6_vip
+    ret=$?
+    set -eu
+    if [ $ret -ne 0 ]; then
+        echo "Fixing up IPv6 VIPs failed. Stopping here. (See https://bugs.launchpad.net/tripleo/+bug/1686357 for more info)"
+        exit 1
      fi
  fi
  
+command_arguments=${command_arguments:-}
+
+# Always ensure yum has full cache
+yum makecache || echo "Yum makecache failed. This can cause failure later on."
+
+# yum check-update exits 100 if updates are available
+set +e
+check_update=$(yum check-update 2>&1)
+check_update_exit=$?
+set -e
+
+if [[ "$check_update_exit" == "1" ]]; then
+    echo "Failed to check for package updates"
+    echo "$check_update"
+    exit 1
+elif [[ "$check_update_exit" != "100" ]]; then
+    echo "No packages require updating"
+    exit 0
+fi
+
+# special case https://bugs.launchpad.net/tripleo/+bug/1635205 +bug/1669714
+special_case_ovs_upgrade_if_needed
+
  if [[ "$pacemaker_status" == "active" ]] ; then
      echo "Pacemaker running, stopping cluster node and doing full package update"
      node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
@@ -81,6 +96,7 @@ if [[ "$pacemaker_status" == "active" ]] ; then
      fi
  else
      echo "Upgrading openstack-puppet-modules and its dependencies"
+    check_for_yum_lock
      yum -q -y update openstack-puppet-modules
      yum deplist openstack-puppet-modules | awk '/dependency/{print $2}' | xargs yum -q -y update
      echo "Upgrading other packages is handled by config management tooling"
@@ -90,8 +106,9 @@ fi
  
  command=${command:-update}
  full_command="yum -q -y $command $command_arguments"
-echo "Running: $full_command"
  
+echo "Running: $full_command"
+check_for_yum_lock
  result=$($full_command)
  return_code=$?
  echo "$result"
@@ -113,15 +130,19 @@ if [[ "$pacemaker_status" == "active" ]] ; then
          fi
      done
  
-    tstart=$(date +%s)
-    while ! clustercheck; do
-        sleep 5
-        tnow=$(date +%s)
-        if (( tnow-tstart > galera_sync_timeout )) ; then
-            echo "ERROR galera sync timed out"
-            exit 1
-        fi
-    done
+    RETVAL=$( pcs resource show galera-master | grep wsrep_cluster_address | grep -q `crm_node -n` ; echo $? )
+
+    if [[ $RETVAL -eq 0 && -e /etc/sysconfig/clustercheck ]]; then
+        tstart=$(date +%s)
+        while ! clustercheck; do
+            sleep 5
+            tnow=$(date +%s)
+            if (( tnow-tstart > galera_sync_timeout )) ; then
+                echo "ERROR galera sync timed out"
+                exit 1
+            fi
+        done
+    fi
  
      echo "Waiting for pacemaker cluster to settle"
      if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
@@ -132,6 +153,7 @@ if [[ "$pacemaker_status" == "active" ]] ; then
      pcs status
  fi
  
-echo "Finished yum_update.sh on server $deploy_server_id at `date`"
+
+echo "Finished yum_update.sh on server $deploy_server_id at `date` with return code: $return_code"
  
  exit $return_code