extraconfig/tasks/yum_update.sh

   1 #!/bin/bash
   2
   3 # A heat-config-script which runs yum update during a stack-update.
   4 # Inputs:
   5 #   deploy_action - yum will only be run if this is UPDATE
   6 #   update_identifier - yum will only run for previously unused values of update_identifier
   7 #   command - yum sub-command to run, defaults to "update"
   8 #   command_arguments - yum command arguments, defaults to ""
   9
  10 echo "Started yum_update.sh on server $deploy_server_id at `date`"
  11 echo -n "false" > $heat_outputs_path.update_managed_packages
  12
  13 if [[ -z "$update_identifier" ]]; then
  14     echo "Not running due to unset update_identifier"
  15     exit 0
  16 fi
  17
  18 timestamp_dir=/var/lib/overcloud-yum-update
  19 mkdir -p $timestamp_dir
  20
  21 # sanitise to remove unusual characters
  22 update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
  23
  24 # seconds to wait for this node to rejoin the cluster after update
  25 cluster_start_timeout=360
  26 galera_sync_timeout=360
  27
  28 timestamp_file="$timestamp_dir/$update_identifier"
  29 if [[ -a "$timestamp_file" ]]; then
  30     echo "Not running for already-run timestamp \"$update_identifier\""
  31     exit 0
  32 fi
  33 touch "$timestamp_file"
  34
  35 command_arguments=${command_arguments:-}
  36
  37 list_updates=$(yum list updates)
  38
  39 if [[ "$list_updates" == "" ]]; then
  40     echo "No packages require updating"
  41     exit 0
  42 fi
  43
  44 pacemaker_status=$(systemctl is-active pacemaker)
  45
  46 if [[ "$pacemaker_status" == "active" ]] ; then
  47     echo "Checking for and adding missing constraints"
  48
  49     if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
  50         pcs constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
  51     fi
  52
  53     if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
  54         pcs constraint order start rabbitmq-clone then openstack-keystone-clone
  55     fi
  56
  57     if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
  58         pcs constraint order promote galera-master then openstack-keystone-clone
  59     fi
  60
  61     if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
  62         pcs constraint order start haproxy-clone then openstack-keystone-clone
  63     fi
  64
  65     if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
  66         pcs constraint order start memcached-clone then openstack-keystone-clone
  67     fi
  68
  69     if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
  70         pcs constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
  71     fi
  72
  73     if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
  74         pcs resource defaults resource-stickiness=INFINITY
  75     fi
  76
  77     echo "Setting resource start/stop timeouts"
  78
  79     # timeouts for non-openstack services and special cases
  80     pcs resource update haproxy op start timeout=100s
  81     pcs resource update haproxy op stop timeout=100s
  82     # mongod start timeout is also higher, setting only stop timeout
  83     pcs resource update mongod op stop timeout=100s
  84     # rabbit start timeout is already 100s
  85     pcs resource update rabbitmq op stop timeout=100s
  86     pcs resource update memcached op start timeout=100s
  87     pcs resource update memcached op stop timeout=100s
  88     pcs resource update httpd op start timeout=100s
  89     pcs resource update httpd op stop timeout=100s
  90     # neutron-netns-cleanup stop timeout is 300s, setting only start timeout
  91     pcs resource update neutron-netns-cleanup op start timeout=100s
  92     # neutron-ovs-cleanup stop timeout is 300s, setting only start timeout
  93     pcs resource update neutron-ovs-cleanup op start timeout=100s
  94
  95     # timeouts for openstack services
  96     pcs resource update neutron-dhcp-agent op start timeout=100s
  97     pcs resource update neutron-dhcp-agent op stop timeout=100s
  98     pcs resource update neutron-l3-agent op start timeout=100s
  99     pcs resource update neutron-l3-agent op stop timeout=100s
 100     pcs resource update neutron-metadata-agent op start timeout=100s
 101     pcs resource update neutron-metadata-agent op stop timeout=100s
 102     pcs resource update neutron-openvswitch-agent op start timeout=100s
 103     pcs resource update neutron-openvswitch-agent op stop timeout=100s
 104     pcs resource update neutron-server op start timeout=100s
 105     pcs resource update neutron-server op stop timeout=100s
 106     pcs resource update openstack-ceilometer-alarm-evaluator op start timeout=100s
 107     pcs resource update openstack-ceilometer-alarm-evaluator op stop timeout=100s
 108     pcs resource update openstack-ceilometer-alarm-notifier op start timeout=100s
 109     pcs resource update openstack-ceilometer-alarm-notifier op stop timeout=100s
 110     pcs resource update openstack-ceilometer-api op start timeout=100s
 111     pcs resource update openstack-ceilometer-api op stop timeout=100s
 112     pcs resource update openstack-ceilometer-central op start timeout=100s
 113     pcs resource update openstack-ceilometer-central op stop timeout=100s
 114     pcs resource update openstack-ceilometer-collector op start timeout=100s
 115     pcs resource update openstack-ceilometer-collector op stop timeout=100s
 116     pcs resource update openstack-ceilometer-notification op start timeout=100s
 117     pcs resource update openstack-ceilometer-notification op stop timeout=100s
 118     pcs resource update openstack-cinder-api op start timeout=100s
 119     pcs resource update openstack-cinder-api op stop timeout=100s
 120     pcs resource update openstack-cinder-scheduler op start timeout=100s
 121     pcs resource update openstack-cinder-scheduler op stop timeout=100s
 122     pcs resource update openstack-cinder-volume op start timeout=100s
 123     pcs resource update openstack-cinder-volume op stop timeout=100s
 124     pcs resource update openstack-glance-api op start timeout=100s
 125     pcs resource update openstack-glance-api op stop timeout=100s
 126     pcs resource update openstack-glance-registry op start timeout=100s
 127     pcs resource update openstack-glance-registry op stop timeout=100s
 128     pcs resource update openstack-heat-api op start timeout=100s
 129     pcs resource update openstack-heat-api op stop timeout=100s
 130     pcs resource update openstack-heat-api-cfn op start timeout=100s
 131     pcs resource update openstack-heat-api-cfn op stop timeout=100s
 132     pcs resource update openstack-heat-api-cloudwatch op start timeout=100s
 133     pcs resource update openstack-heat-api-cloudwatch op stop timeout=100s
 134     pcs resource update openstack-heat-engine op start timeout=100s
 135     pcs resource update openstack-heat-engine op stop timeout=100s
 136     pcs resource update openstack-keystone op start timeout=100s
 137     pcs resource update openstack-keystone op stop timeout=100s
 138     pcs resource update openstack-nova-api op start timeout=100s
 139     pcs resource update openstack-nova-api op stop timeout=100s
 140     pcs resource update openstack-nova-conductor op start timeout=100s
 141     pcs resource update openstack-nova-conductor op stop timeout=100s
 142     pcs resource update openstack-nova-consoleauth op start timeout=100s
 143     pcs resource update openstack-nova-consoleauth op stop timeout=100s
 144     pcs resource update openstack-nova-novncproxy op start timeout=100s
 145     pcs resource update openstack-nova-novncproxy op stop timeout=100s
 146     pcs resource update openstack-nova-scheduler op start timeout=100s
 147     pcs resource update openstack-nova-scheduler op stop timeout=100s
 148
 149     echo "Pacemaker running, stopping cluster node and doing full package update"
 150     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
 151     if [[ "$node_count" == "1" ]] ; then
 152         echo "Active node count is 1, stopping node with --force"
 153         pcs cluster stop --force
 154     else
 155         pcs cluster stop
 156     fi
 157 else
 158     echo "Excluding upgrading packages that are handled by config management tooling"
 159     command_arguments="$command_arguments --skip-broken"
 160     for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
 161         command_arguments="$command_arguments --exclude $exclude"
 162     done
 163 fi
 164
 165 command=${command:-update}
 166 full_command="yum -y $command $command_arguments"
 167 echo "Running: $full_command"
 168
 169 result=$($full_command)
 170 return_code=$?
 171 echo "$result"
 172 echo "yum return code: $return_code"
 173
 174 if [[ "$pacemaker_status" == "active" ]] ; then
 175     echo "Starting cluster node"
 176     pcs cluster start
 177
 178     hostname=$(hostname -s)
 179     tstart=$(date +%s)
 180     while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
 181         sleep 5
 182         tnow=$(date +%s)
 183         if (( tnow-tstart > cluster_start_timeout )) ; then
 184             echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
 185             pcs status
 186             exit 1
 187         fi
 188     done
 189
 190     tstart=$(date +%s)
 191     while ! clustercheck; do
 192         sleep 5
 193         tnow=$(date +%s)
 194         if (( tnow-tstart > galera_sync_timeout )) ; then
 195             echo "ERROR galera sync timed out"
 196             exit 1
 197         fi
 198     done
 199
 200     pcs status
 201
 202 else
 203     echo -n "true" > $heat_outputs_path.update_managed_packages
 204 fi
 205
 206 echo "Finished yum_update.sh on server $deploy_server_id at `date`"
 207
 208 exit $return_code