extraconfig/tasks/yum_update.sh

   1 #!/bin/bash
   2
   3 # A heat-config-script which runs yum update during a stack-update.
   4 # Inputs:
   5 #   deploy_action - yum will only be run if this is UPDATE
   6 #   update_identifier - yum will only run for previously unused values of update_identifier
   7 #   command - yum sub-command to run, defaults to "update"
   8 #   command_arguments - yum command arguments, defaults to ""
   9
  10 echo "Started yum_update.sh on server $deploy_server_id at `date`"
  11 echo -n "false" > $heat_outputs_path.update_managed_packages
  12
  13 if [[ -z "$update_identifier" ]]; then
  14     echo "Not running due to unset update_identifier"
  15     exit 0
  16 fi
  17
  18 timestamp_dir=/var/lib/overcloud-yum-update
  19 mkdir -p $timestamp_dir
  20
  21 # sanitise to remove unusual characters
  22 update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
  23
  24 # seconds to wait for this node to rejoin the cluster after update
  25 cluster_start_timeout=600
  26 galera_sync_timeout=360
  27
  28 timestamp_file="$timestamp_dir/$update_identifier"
  29 if [[ -a "$timestamp_file" ]]; then
  30     echo "Not running for already-run timestamp \"$update_identifier\""
  31     exit 0
  32 fi
  33 touch "$timestamp_file"
  34
  35 command_arguments=${command_arguments:-}
  36
  37 list_updates=$(yum list updates)
  38
  39 if [[ "$list_updates" == "" ]]; then
  40     echo "No packages require updating"
  41     exit 0
  42 fi
  43
  44 pacemaker_status=$(systemctl is-active pacemaker)
  45 pacemaker_dumpfile=$(mktemp)
  46
  47 if [[ "$pacemaker_status" == "active" ]] ; then
  48 SERVICES="memcached
  49 httpd
  50 neutron-dhcp-agent
  51 neutron-l3-agent
  52 neutron-metadata-agent
  53 neutron-openvswitch-agent
  54 neutron-server
  55 openstack-ceilometer-alarm-evaluator
  56 openstack-ceilometer-alarm-notifier
  57 openstack-ceilometer-api
  58 openstack-ceilometer-central
  59 openstack-ceilometer-collector
  60 openstack-ceilometer-notification
  61 openstack-cinder-api
  62 openstack-cinder-scheduler
  63 openstack-cinder-volume
  64 openstack-glance-api
  65 openstack-glance-registry
  66 openstack-heat-api
  67 openstack-heat-api-cfn
  68 openstack-heat-api-cloudwatch
  69 openstack-heat-engine
  70 openstack-keystone
  71 openstack-nova-api
  72 openstack-nova-conductor
  73 openstack-nova-consoleauth
  74 openstack-nova-novncproxy
  75 openstack-nova-scheduler"
  76
  77     echo "Dumping Pacemaker config"
  78     pcs cluster cib $pacemaker_dumpfile
  79
  80     echo "Checking for missing constraints"
  81
  82     if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
  83         pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
  84     fi
  85
  86     if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
  87         pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone
  88     fi
  89
  90     if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
  91         pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone
  92     fi
  93
  94     if pcs resource | grep "haproxy-clone"; then
  95         SERVICES="$SERVICES haproxy"
  96         if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
  97             pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone
  98         fi
  99     fi
 100
 101     if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
 102         pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone
 103     fi
 104
 105     if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
 106         pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
 107     fi
 108
 109     # ensure neutron constraints https://review.openstack.org/#/c/229466
 110     # remove ovs-cleanup after server and add openvswitch-agent instead
 111     if  pcs constraint order show  | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then
 112         pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory
 113     fi
 114     if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then
 115         pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone
 116     fi
 117
 118
 119     if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
 120         pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY
 121     fi
 122
 123     echo "Setting resource start/stop timeouts"
 124     for service in $SERVICES; do
 125         pcs -f $pacemaker_dumpfile resource update $service op start timeout=100s op stop timeout=100s
 126     done
 127     # mongod start timeout is higher, setting only stop timeout
 128     pcs resource update mongod op stop timeout=100s
 129
 130     echo "Applying new Pacemaker config"
 131     pcs cluster cib-push $pacemaker_dumpfile
 132
 133     echo "Pacemaker running, stopping cluster node and doing full package update"
 134     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
 135     if [[ "$node_count" == "1" ]] ; then
 136         echo "Active node count is 1, stopping node with --force"
 137         pcs cluster stop --force
 138     else
 139         pcs cluster stop
 140     fi
 141
 142     # clean leftover keepalived and radvd instances from neutron
 143     # (can be removed when we remove neutron-netns-cleanup from cluster services)
 144     # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init
 145     killall neutron-keepalived-state-change 2>/dev/null || :
 146     kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || :
 147     kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || :
 148 else
 149     echo "Excluding upgrading packages that are handled by config management tooling"
 150     command_arguments="$command_arguments --skip-broken"
 151     for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
 152         command_arguments="$command_arguments --exclude $exclude"
 153     done
 154 fi
 155
 156 command=${command:-update}
 157 full_command="yum -y $command $command_arguments"
 158 echo "Running: $full_command"
 159
 160 result=$($full_command)
 161 return_code=$?
 162 echo "$result"
 163 echo "yum return code: $return_code"
 164
 165 if [[ "$pacemaker_status" == "active" ]] ; then
 166     echo "Starting cluster node"
 167     pcs cluster start
 168
 169     hostname=$(hostname -s)
 170     tstart=$(date +%s)
 171     while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
 172         sleep 5
 173         tnow=$(date +%s)
 174         if (( tnow-tstart > cluster_start_timeout )) ; then
 175             echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
 176             pcs status
 177             exit 1
 178         fi
 179     done
 180
 181     tstart=$(date +%s)
 182     while ! clustercheck; do
 183         sleep 5
 184         tnow=$(date +%s)
 185         if (( tnow-tstart > galera_sync_timeout )) ; then
 186             echo "ERROR galera sync timed out"
 187             exit 1
 188         fi
 189     done
 190
 191     pcs status
 192
 193 else
 194     echo -n "true" > $heat_outputs_path.update_managed_packages
 195 fi
 196
 197 echo "Finished yum_update.sh on server $deploy_server_id at `date`"
 198
 199 exit $return_code