X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=extraconfig%2Ftasks%2Fyum_update.sh;h=a2a04e8e4134c94d88e5c61496aec26f2c3c260c;hb=26d7023a07c12df582145b9ff68c6bdce60fb31a;hp=4c87373e5cedc49711212a8f9f736055129ef5f3;hpb=09af40f5dbbdd335005640b538ed043dda75e77b;p=apex-tripleo-heat-templates.git diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index 4c87373e..a2a04e8e 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -38,38 +38,53 @@ if [[ -a "$timestamp_file" ]]; then fi touch "$timestamp_file" -command_arguments=${command_arguments:-} - -list_updates=$(yum list updates) - -if [[ "$list_updates" == "" ]]; then - echo "No packages require updating" - exit 0 -fi - pacemaker_status="" -if hiera -c /etc/puppet/hiera.yaml service_names | grep -q pacemaker; then +# We include word boundaries in order to not match pacemaker_remote +if hiera -c /etc/puppet/hiera.yaml service_names | grep -q '\bpacemaker\b'; then pacemaker_status=$(systemctl is-active pacemaker) fi -# Fix the redis/rabbit resource start/stop timeouts. See https://bugs.launchpad.net/tripleo/+bug/1633455 -# and https://bugs.launchpad.net/tripleo/+bug/1634851 +# (NB: when backporting this s/pacemaker_short_bootstrap_node_name/bootstrap_nodeid) +# This runs before the yum_update so we are guaranteed to run it even in the absence +# of packages to update (the check for -z "$update_identifier" guarantees that this +# is run only on overcloud stack update -i) if [[ "$pacemaker_status" == "active" && \ - "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]] ; then - if pcs resource show rabbitmq | grep -E "start.*timeout=100"; then - pcs resource update rabbitmq op start timeout=200s - fi - if pcs resource show rabbitmq | grep -E "stop.*timeout=90"; then - pcs resource update rabbitmq op stop timeout=200s - fi - if pcs resource show redis | grep -E "start.*timeout=120"; then - pcs resource update redis op start timeout=200s - fi - if pcs resource show redis | grep -E "stop.*timeout=120"; then - pcs resource update redis op stop timeout=200s + "$(hiera -c /etc/puppet/hiera.yaml pacemaker_short_bootstrap_node_name | tr '[:upper:]' '[:lower:]')" == "$(facter hostname | tr '[:upper:]' '[:lower:]')" ]] ; then \ + # OCF scripts don't cope with -eu + echo "Verifying if we need to fix up any IPv6 VIPs" + set +eu + fixup_wrong_ipv6_vip + ret=$? + set -eu + if [ $ret -ne 0 ]; then + echo "Fixing up IPv6 VIPs failed. Stopping here. (See https://bugs.launchpad.net/tripleo/+bug/1686357 for more info)" + exit 1 fi fi +command_arguments=${command_arguments:-} + +# Always ensure yum has full cache +yum makecache || echo "Yum makecache failed. This can cause failure later on." + +# yum check-update exits 100 if updates are available +set +e +check_update=$(yum check-update 2>&1) +check_update_exit=$? +set -e + +if [[ "$check_update_exit" == "1" ]]; then + echo "Failed to check for package updates" + echo "$check_update" + exit 1 +elif [[ "$check_update_exit" != "100" ]]; then + echo "No packages require updating" + exit 0 +fi + +# special case https://bugs.launchpad.net/tripleo/+bug/1635205 +bug/1669714 +special_case_ovs_upgrade_if_needed + if [[ "$pacemaker_status" == "active" ]] ; then echo "Pacemaker running, stopping cluster node and doing full package update" node_count=$(pcs status xml | grep -o "" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*") @@ -81,6 +96,7 @@ if [[ "$pacemaker_status" == "active" ]] ; then fi else echo "Upgrading openstack-puppet-modules and its dependencies" + check_for_yum_lock yum -q -y update openstack-puppet-modules yum deplist openstack-puppet-modules | awk '/dependency/{print $2}' | xargs yum -q -y update echo "Upgrading other packages is handled by config management tooling" @@ -90,24 +106,14 @@ fi command=${command:-update} full_command="yum -q -y $command $command_arguments" -echo "Running: $full_command" +echo "Running: $full_command" +check_for_yum_lock result=$($full_command) return_code=$? echo "$result" echo "yum return code: $return_code" -# Writes any changes caused by alterations to os-net-config and bounces the -# interfaces *before* restarting the cluster. -os-net-config -c /etc/os-net-config/config.json -v --detailed-exit-codes -RETVAL=$? -if [[ $RETVAL == 2 ]]; then - echo "os-net-config: interface configuration files updated successfully" -elif [[ $RETVAL != 0 ]]; then - echo "ERROR: os-net-config configuration failed" - exit $RETVAL -fi - if [[ "$pacemaker_status" == "active" ]] ; then echo "Starting cluster node" pcs cluster start @@ -124,15 +130,19 @@ if [[ "$pacemaker_status" == "active" ]] ; then fi done - tstart=$(date +%s) - while ! clustercheck; do - sleep 5 - tnow=$(date +%s) - if (( tnow-tstart > galera_sync_timeout )) ; then - echo "ERROR galera sync timed out" - exit 1 - fi - done + RETVAL=$( pcs resource show galera-master | grep wsrep_cluster_address | grep -q `crm_node -n` ; echo $? ) + + if [[ $RETVAL -eq 0 && -e /etc/sysconfig/clustercheck ]]; then + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo "ERROR galera sync timed out" + exit 1 + fi + done + fi echo "Waiting for pacemaker cluster to settle" if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then @@ -143,6 +153,7 @@ if [[ "$pacemaker_status" == "active" ]] ; then pcs status fi -echo "Finished yum_update.sh on server $deploy_server_id at `date`" + +echo "Finished yum_update.sh on server $deploy_server_id at `date` with return code: $return_code" exit $return_code