extraconfig/tasks/yum_update.sh

   1 #!/bin/bash
   2
   3 # A heat-config-script which runs yum update during a stack-update.
   4 # Inputs:
   5 #   deploy_action - yum will only be run if this is UPDATE
   6 #   update_identifier - yum will only run for previously unused values of update_identifier
   7 #   command - yum sub-command to run, defaults to "update"
   8 #   command_arguments - yum command arguments, defaults to ""
   9
  10 echo "Started yum_update.sh on server $deploy_server_id at `date`"
  11 echo -n "false" > $heat_outputs_path.update_managed_packages
  12
  13 if [[ -z "$update_identifier" ]]; then
  14     echo "Not running due to unset update_identifier"
  15     exit 0
  16 fi
  17
  18 timestamp_dir=/var/lib/overcloud-yum-update
  19 mkdir -p $timestamp_dir
  20
  21 # sanitise to remove unusual characters
  22 update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
  23
  24 # seconds to wait for this node to rejoin the cluster after update
  25 cluster_start_timeout=600
  26 galera_sync_timeout=360
  27 cluster_settle_timeout=1800
  28
  29 timestamp_file="$timestamp_dir/$update_identifier"
  30 if [[ -a "$timestamp_file" ]]; then
  31     echo "Not running for already-run timestamp \"$update_identifier\""
  32     exit 0
  33 fi
  34 touch "$timestamp_file"
  35
  36 command_arguments=${command_arguments:-}
  37
  38 list_updates=$(yum list updates)
  39
  40 if [[ "$list_updates" == "" ]]; then
  41     echo "No packages require updating"
  42     exit 0
  43 fi
  44
  45 pacemaker_status=$(systemctl is-active pacemaker)
  46 pacemaker_dumpfile=$(mktemp)
  47
  48 if [[ "$pacemaker_status" == "active" ]] ; then
  49 SERVICES="memcached
  50 httpd
  51 neutron-dhcp-agent
  52 neutron-l3-agent
  53 neutron-metadata-agent
  54 neutron-openvswitch-agent
  55 neutron-server
  56 openstack-ceilometer-api
  57 openstack-ceilometer-central
  58 openstack-ceilometer-collector
  59 openstack-ceilometer-notification
  60 openstack-aodh-evaluator
  61 openstack-aodh-notifier
  62 openstack-aodh-listener
  63 openstack-cinder-api
  64 openstack-cinder-scheduler
  65 openstack-cinder-volume
  66 openstack-glance-api
  67 openstack-glance-registry
  68 openstack-heat-api
  69 openstack-heat-api-cfn
  70 openstack-heat-api-cloudwatch
  71 openstack-heat-engine
  72 openstack-keystone
  73 openstack-nova-api
  74 openstack-nova-conductor
  75 openstack-nova-consoleauth
  76 openstack-nova-novncproxy
  77 openstack-nova-scheduler"
  78
  79     echo "Dumping Pacemaker config"
  80     pcs cluster cib $pacemaker_dumpfile
  81
  82     echo "Checking for missing constraints"
  83
  84     if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
  85         pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
  86     fi
  87
  88     if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
  89         pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone
  90     fi
  91
  92     if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
  93         pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone
  94     fi
  95
  96     if pcs resource | grep "haproxy-clone"; then
  97         SERVICES="$SERVICES haproxy"
  98         if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
  99             pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone
 100         fi
 101     fi
 102
 103     if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
 104         pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone
 105     fi
 106
 107     if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
 108         pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
 109     fi
 110
 111     if ! pcs constraint order show | grep "promote redis-master then start openstack-aodh-evaluator-clone"; then
 112         pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-aodh-evaluator-clone require-all=false
 113     fi
 114     # ensure neutron constraints https://review.openstack.org/#/c/229466
 115     # remove ovs-cleanup after server and add openvswitch-agent instead
 116     if  pcs constraint order show  | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then
 117         pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory
 118     fi
 119     if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then
 120         pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone
 121     fi
 122
 123
 124     if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
 125         pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY
 126     fi
 127
 128     echo "Setting resource start/stop timeouts"
 129     for service in $SERVICES; do
 130         pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s
 131     done
 132     # mongod start timeout is higher, setting only stop timeout
 133     pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op  stop timeout=200s
 134
 135     echo "Making sure rabbitmq has the notify=true meta parameter"
 136     pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true
 137
 138     echo "Applying new Pacemaker config"
 139     if ! pcs cluster cib-push $pacemaker_dumpfile; then
 140         echo "ERROR failed to apply new pacemaker config"
 141         exit 1
 142     fi
 143
 144     echo "Pacemaker running, stopping cluster node and doing full package update"
 145     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
 146     if [[ "$node_count" == "1" ]] ; then
 147         echo "Active node count is 1, stopping node with --force"
 148         pcs cluster stop --force
 149     else
 150         pcs cluster stop
 151     fi
 152
 153     # clean leftover keepalived and radvd instances from neutron
 154     # (can be removed when we remove neutron-netns-cleanup from cluster services)
 155     # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init
 156     killall neutron-keepalived-state-change 2>/dev/null || :
 157     kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || :
 158     kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || :
 159 else
 160     echo "Upgrading openstack-puppet-modules"
 161     yum -q -y update openstack-puppet-modules
 162     echo "Upgrading other packages is handled by config management tooling"
 163     echo -n "true" > $heat_outputs_path.update_managed_packages
 164     exit 0
 165 fi
 166
 167 command=${command:-update}
 168 full_command="yum -q -y $command $command_arguments"
 169 echo "Running: $full_command"
 170
 171 result=$($full_command)
 172 return_code=$?
 173 echo "$result"
 174 echo "yum return code: $return_code"
 175
 176 if [[ "$pacemaker_status" == "active" ]] ; then
 177     echo "Starting cluster node"
 178     pcs cluster start
 179
 180     hostname=$(hostname -s)
 181     tstart=$(date +%s)
 182     while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
 183         sleep 5
 184         tnow=$(date +%s)
 185         if (( tnow-tstart > cluster_start_timeout )) ; then
 186             echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
 187             pcs status
 188             exit 1
 189         fi
 190     done
 191
 192     tstart=$(date +%s)
 193     while ! clustercheck; do
 194         sleep 5
 195         tnow=$(date +%s)
 196         if (( tnow-tstart > galera_sync_timeout )) ; then
 197             echo "ERROR galera sync timed out"
 198             exit 1
 199         fi
 200     done
 201
 202     echo "Waiting for pacemaker cluster to settle"
 203     if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
 204         echo "ERROR timed out while waiting for the cluster to settle"
 205         exit 1
 206     fi
 207
 208     pcs status
 209 fi
 210
 211 echo "Finished yum_update.sh on server $deploy_server_id at `date`"
 212
 213 exit $return_code