extraconfig/tasks/yum_update.sh

   1 #!/bin/bash
   2
   3 # A heat-config-script which runs yum update during a stack-update.
   4 # Inputs:
   5 #   deploy_action - yum will only be run if this is UPDATE
   6 #   update_identifier - yum will only run for previously unused values of update_identifier
   7 #   command - yum sub-command to run, defaults to "update"
   8 #   command_arguments - yum command arguments, defaults to ""
   9
  10 echo "Started yum_update.sh on server $deploy_server_id at `date`"
  11 echo -n "false" > $heat_outputs_path.update_managed_packages
  12
  13 if [[ -z "$update_identifier" ]]; then
  14     echo "Not running due to unset update_identifier"
  15     exit 0
  16 fi
  17
  18 timestamp_dir=/var/lib/overcloud-yum-update
  19 mkdir -p $timestamp_dir
  20
  21 # sanitise to remove unusual characters
  22 update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
  23
  24 # seconds to wait for this node to rejoin the cluster after update
  25 cluster_start_timeout=600
  26 galera_sync_timeout=1800
  27 cluster_settle_timeout=1800
  28
  29 timestamp_file="$timestamp_dir/$update_identifier"
  30 if [[ -a "$timestamp_file" ]]; then
  31     echo "Not running for already-run timestamp \"$update_identifier\""
  32     exit 0
  33 fi
  34 touch "$timestamp_file"
  35
  36 command_arguments=${command_arguments:-}
  37
  38 list_updates=$(yum list updates)
  39
  40 if [[ "$list_updates" == "" ]]; then
  41     echo "No packages require updating"
  42     exit 0
  43 fi
  44
  45 pacemaker_status=$(systemctl is-active pacemaker)
  46
  47 # Fix the redis/rabbit resource start/stop timeouts. See https://bugs.launchpad.net/tripleo/+bug/1633455
  48 # and https://bugs.launchpad.net/tripleo/+bug/1634851
  49 if [[ "$pacemaker_status" == "active" && \
  50       "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]] ; then
  51     if pcs resource show rabbitmq | grep -E "start.*timeout=100"; then
  52         pcs resource update rabbitmq op start timeout=200s
  53     fi
  54     if pcs resource show rabbitmq | grep -E "stop.*timeout=90"; then
  55         pcs resource update rabbitmq op stop timeout=200s
  56     fi
  57     if pcs resource show redis | grep -E "start.*timeout=120"; then
  58         pcs resource update redis op start timeout=200s
  59     fi
  60     if pcs resource show redis | grep -E "stop.*timeout=120"; then
  61         pcs resource update redis op stop timeout=200s
  62     fi
  63 fi
  64
  65
  66 if [[ "$pacemaker_status" == "active" ]] ; then
  67     echo "Pacemaker running, stopping cluster node and doing full package update"
  68     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
  69     if [[ "$node_count" == "1" ]] ; then
  70         echo "Active node count is 1, stopping node with --force"
  71         pcs cluster stop --force
  72     else
  73         pcs cluster stop
  74     fi
  75 else
  76     echo "Upgrading openstack-puppet-modules and its dependencies"
  77     yum -q -y update openstack-puppet-modules
  78     yum deplist openstack-puppet-modules | awk '/dependency/{print $2}' | xargs yum -q -y update
  79     echo "Upgrading other packages is handled by config management tooling"
  80     echo -n "true" > $heat_outputs_path.update_managed_packages
  81     exit 0
  82 fi
  83
  84 # Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
  85 if [[ -n $(rpm -q --scripts openvswitch | awk '/postuninstall/,/*/' | grep "systemctl.*try-restart") ]]; then
  86     echo "Manual upgrade of openvswitch - restart in postun detected"
  87     mkdir OVS_UPGRADE || true
  88     pushd OVS_UPGRADE
  89     echo "Attempting to downloading latest openvswitch with yumdownloader"
  90     yumdownloader --resolve openvswitch
  91     echo "Updating openvswitch with nopostun option"
  92     rpm -U --replacepkgs --nopostun ./*.rpm
  93     popd
  94 else
  95     echo "Skipping manual upgrade of openvswitch - no restart in postun detected"
  96 fi
  97
  98 command=${command:-update}
  99 full_command="yum -q -y $command $command_arguments"
 100 echo "Running: $full_command"
 101
 102 result=$($full_command)
 103 return_code=$?
 104 echo "$result"
 105 echo "yum return code: $return_code"
 106
 107 if [[ "$pacemaker_status" == "active" ]] ; then
 108     echo "Starting cluster node"
 109     pcs cluster start
 110
 111     hostname=$(hostname -s)
 112     tstart=$(date +%s)
 113     while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
 114         sleep 5
 115         tnow=$(date +%s)
 116         if (( tnow-tstart > cluster_start_timeout )) ; then
 117             echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
 118             pcs status
 119             exit 1
 120         fi
 121     done
 122
 123     tstart=$(date +%s)
 124     while ! clustercheck; do
 125         sleep 5
 126         tnow=$(date +%s)
 127         if (( tnow-tstart > galera_sync_timeout )) ; then
 128             echo "ERROR galera sync timed out"
 129             exit 1
 130         fi
 131     done
 132
 133     echo "Waiting for pacemaker cluster to settle"
 134     if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
 135         echo "ERROR timed out while waiting for the cluster to settle"
 136         exit 1
 137     fi
 138
 139     pcs status
 140 fi
 141
 142 echo "Finished yum_update.sh on server $deploy_server_id at `date`"
 143
 144 exit $return_code