extraconfig/tasks/yum_update.sh

   1 #!/bin/bash
   2
   3 # A heat-config-script which runs yum update during a stack-update.
   4 # Inputs:
   5 #   deploy_action - yum will only be run if this is UPDATE
   6 #   update_identifier - yum will only run for previously unused values of update_identifier
   7 #   command - yum sub-command to run, defaults to "update"
   8 #   command_arguments - yum command arguments, defaults to ""
   9
  10 echo "Started yum_update.sh on server $deploy_server_id at `date`"
  11 echo -n "false" > $heat_outputs_path.update_managed_packages
  12
  13 if [[ -z "$update_identifier" ]]; then
  14     echo "Not running due to unset update_identifier"
  15     exit 0
  16 fi
  17
  18 timestamp_dir=/var/lib/overcloud-yum-update
  19 mkdir -p $timestamp_dir
  20
  21 # sanitise to remove unusual characters
  22 update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
  23
  24 # seconds to wait for this node to rejoin the cluster after update
  25 cluster_start_timeout=360
  26
  27 timestamp_file="$timestamp_dir/$update_identifier"
  28 if [[ -a "$timestamp_file" ]]; then
  29     echo "Not running for already-run timestamp \"$update_identifier\""
  30     exit 0
  31 fi
  32 touch "$timestamp_file"
  33
  34 command_arguments=${command_arguments:-}
  35
  36 list_updates=$(yum list updates)
  37
  38 if [[ "$list_updates" == "" ]]; then
  39     echo "No packages require updating"
  40     exit 0
  41 fi
  42
  43 pacemaker_status=$(systemctl is-active pacemaker)
  44
  45 if [[ "$pacemaker_status" == "active" ]] ; then
  46     echo "Checking for and adding missing constraints"
  47
  48     if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
  49         pcs constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
  50     fi
  51
  52     if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
  53         pcs constraint order start rabbitmq-clone then openstack-keystone-clone
  54     fi
  55
  56     if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
  57         pcs constraint order promote galera-master then openstack-keystone-clone
  58     fi
  59
  60     if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
  61         pcs constraint order start haproxy-clone then openstack-keystone-clone
  62     fi
  63
  64     if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
  65         pcs constraint order start memcached-clone then openstack-keystone-clone
  66     fi
  67
  68     if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
  69         pcs constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
  70     fi
  71
  72     if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
  73         pcs resource defaults resource-stickiness=INFINITY
  74     fi
  75
  76     echo "Setting resource start/stop timeouts"
  77
  78     # timeouts for non-openstack services and special cases
  79     pcs resource update haproxy op start timeout=100s
  80     pcs resource update haproxy op stop timeout=100s
  81     # mongod start timeout is also higher, setting only stop timeout
  82     pcs resource update mongod op stop timeout=100s
  83     # rabbit start timeout is already 100s
  84     pcs resource update rabbitmq op stop timeout=100s
  85     pcs resource update memcached op start timeout=100s
  86     pcs resource update memcached op stop timeout=100s
  87     pcs resource update httpd op start timeout=100s
  88     pcs resource update httpd op stop timeout=100s
  89     # neutron-netns-cleanup stop timeout is 300s, setting only start timeout
  90     pcs resource update neutron-netns-cleanup op start timeout=100s
  91     # neutron-ovs-cleanup stop timeout is 300s, setting only start timeout
  92     pcs resource update neutron-ovs-cleanup op start timeout=100s
  93
  94     # timeouts for openstack services
  95     pcs resource update neutron-dhcp-agent op start timeout=100s
  96     pcs resource update neutron-dhcp-agent op stop timeout=100s
  97     pcs resource update neutron-l3-agent op start timeout=100s
  98     pcs resource update neutron-l3-agent op stop timeout=100s
  99     pcs resource update neutron-metadata-agent op start timeout=100s
 100     pcs resource update neutron-metadata-agent op stop timeout=100s
 101     pcs resource update neutron-openvswitch-agent op start timeout=100s
 102     pcs resource update neutron-openvswitch-agent op stop timeout=100s
 103     pcs resource update neutron-server op start timeout=100s
 104     pcs resource update neutron-server op stop timeout=100s
 105     pcs resource update openstack-ceilometer-alarm-evaluator op start timeout=100s
 106     pcs resource update openstack-ceilometer-alarm-evaluator op stop timeout=100s
 107     pcs resource update openstack-ceilometer-alarm-notifier op start timeout=100s
 108     pcs resource update openstack-ceilometer-alarm-notifier op stop timeout=100s
 109     pcs resource update openstack-ceilometer-api op start timeout=100s
 110     pcs resource update openstack-ceilometer-api op stop timeout=100s
 111     pcs resource update openstack-ceilometer-central op start timeout=100s
 112     pcs resource update openstack-ceilometer-central op stop timeout=100s
 113     pcs resource update openstack-ceilometer-collector op start timeout=100s
 114     pcs resource update openstack-ceilometer-collector op stop timeout=100s
 115     pcs resource update openstack-ceilometer-notification op start timeout=100s
 116     pcs resource update openstack-ceilometer-notification op stop timeout=100s
 117     pcs resource update openstack-cinder-api op start timeout=100s
 118     pcs resource update openstack-cinder-api op stop timeout=100s
 119     pcs resource update openstack-cinder-scheduler op start timeout=100s
 120     pcs resource update openstack-cinder-scheduler op stop timeout=100s
 121     pcs resource update openstack-cinder-volume op start timeout=100s
 122     pcs resource update openstack-cinder-volume op stop timeout=100s
 123     pcs resource update openstack-glance-api op start timeout=100s
 124     pcs resource update openstack-glance-api op stop timeout=100s
 125     pcs resource update openstack-glance-registry op start timeout=100s
 126     pcs resource update openstack-glance-registry op stop timeout=100s
 127     pcs resource update openstack-heat-api op start timeout=100s
 128     pcs resource update openstack-heat-api op stop timeout=100s
 129     pcs resource update openstack-heat-api-cfn op start timeout=100s
 130     pcs resource update openstack-heat-api-cfn op stop timeout=100s
 131     pcs resource update openstack-heat-api-cloudwatch op start timeout=100s
 132     pcs resource update openstack-heat-api-cloudwatch op stop timeout=100s
 133     pcs resource update openstack-heat-engine op start timeout=100s
 134     pcs resource update openstack-heat-engine op stop timeout=100s
 135     pcs resource update openstack-keystone op start timeout=100s
 136     pcs resource update openstack-keystone op stop timeout=100s
 137     pcs resource update openstack-nova-api op start timeout=100s
 138     pcs resource update openstack-nova-api op stop timeout=100s
 139     pcs resource update openstack-nova-conductor op start timeout=100s
 140     pcs resource update openstack-nova-conductor op stop timeout=100s
 141     pcs resource update openstack-nova-consoleauth op start timeout=100s
 142     pcs resource update openstack-nova-consoleauth op stop timeout=100s
 143     pcs resource update openstack-nova-novncproxy op start timeout=100s
 144     pcs resource update openstack-nova-novncproxy op stop timeout=100s
 145     pcs resource update openstack-nova-scheduler op start timeout=100s
 146     pcs resource update openstack-nova-scheduler op stop timeout=100s
 147
 148     echo "Pacemaker running, stopping cluster node and doing full package update"
 149     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
 150     if [[ "$node_count" == "1" ]] ; then
 151         echo "Active node count is 1, stopping node with --force"
 152         pcs cluster stop --force
 153     else
 154         pcs cluster stop
 155     fi
 156 else
 157     echo "Excluding upgrading packages that are handled by config management tooling"
 158     command_arguments="$command_arguments --skip-broken"
 159     for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do
 160         command_arguments="$command_arguments --exclude $exclude"
 161     done
 162 fi
 163
 164 command=${command:-update}
 165 full_command="yum -y $command $command_arguments"
 166 echo "Running: $full_command"
 167
 168 result=$($full_command)
 169 return_code=$?
 170 echo "$result"
 171 echo "yum return code: $return_code"
 172
 173 if [[ "$pacemaker_status" == "active" ]] ; then
 174     echo "Starting cluster node"
 175     pcs cluster start
 176
 177     hostname=$(hostname -s)
 178     tstart=$(date +%s)
 179     while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do
 180         sleep 5
 181         tnow=$(date +%s)
 182         if (( tnow-tstart > cluster_start_timeout )) ; then
 183             echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds"
 184             pcs status
 185             exit 1
 186         fi
 187     done
 188     pcs status
 189
 190 else
 191     echo -n "true" > $heat_outputs_path.update_managed_packages
 192 fi
 193
 194 echo "Finished yum_update.sh on server $deploy_server_id at `date`"
 195
 196 exit $return_code