X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=extraconfig%2Ftasks%2Fyum_update.sh;h=e32369e1b65e776c6140cd3e26464122f8fd6666;hb=f5854e1b5850dc50e51bc36ed4840d58be88d0cf;hp=3d4c772b0d09fd02e3cf694aadc1f994ce5d3ddc;hpb=6cb146cd150400d47a6c19db21d0eef275a07d7b;p=apex-tripleo-heat-templates.git diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index 3d4c772b..e32369e1 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -8,6 +8,7 @@ # command_arguments - yum command arguments, defaults to "" echo "Started yum_update.sh on server $deploy_server_id at `date`" +echo -n "false" > $heat_outputs_path.update_managed_packages if [[ -z "$update_identifier" ]]; then echo "Not running due to unset update_identifier" @@ -20,6 +21,10 @@ mkdir -p $timestamp_dir # sanitise to remove unusual characters update_identifier=${update_identifier//[^a-zA-Z0-9-_]/} +# seconds to wait for this node to rejoin the cluster after update +cluster_start_timeout=600 +galera_sync_timeout=360 + timestamp_file="$timestamp_dir/$update_identifier" if [[ -a "$timestamp_file" ]]; then echo "Not running for already-run timestamp \"$update_identifier\"" @@ -27,6 +32,127 @@ if [[ -a "$timestamp_file" ]]; then fi touch "$timestamp_file" +command_arguments=${command_arguments:-} + +list_updates=$(yum list updates) + +if [[ "$list_updates" == "" ]]; then + echo "No packages require updating" + exit 0 +fi + +pacemaker_status=$(systemctl is-active pacemaker) +pacemaker_dumpfile=$(mktemp) + +if [[ "$pacemaker_status" == "active" ]] ; then +SERVICES="memcached +httpd +neutron-dhcp-agent +neutron-l3-agent +neutron-metadata-agent +neutron-openvswitch-agent +neutron-server +openstack-ceilometer-alarm-evaluator +openstack-ceilometer-alarm-notifier +openstack-ceilometer-api +openstack-ceilometer-central +openstack-ceilometer-collector +openstack-ceilometer-notification +openstack-cinder-api +openstack-cinder-scheduler +openstack-cinder-volume +openstack-glance-api +openstack-glance-registry +openstack-heat-api +openstack-heat-api-cfn +openstack-heat-api-cloudwatch +openstack-heat-engine +openstack-keystone +openstack-nova-api +openstack-nova-conductor +openstack-nova-consoleauth +openstack-nova-novncproxy +openstack-nova-scheduler" + + echo "Dumping Pacemaker config" + pcs cluster cib $pacemaker_dumpfile + + echo "Checking for missing constraints" + + if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then + pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone + fi + + if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone + fi + + if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone + fi + + if pcs resource | grep "haproxy-clone"; then + SERVICES="$SERVICES haproxy" + if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone + fi + fi + + if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then + pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone + fi + + if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then + pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false + fi + + # ensure neutron constraints https://review.openstack.org/#/c/229466 + # remove ovs-cleanup after server and add openvswitch-agent instead + if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then + pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory + fi + if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then + pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone + fi + + + if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then + pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY + fi + + echo "Setting resource start/stop timeouts" + for service in $SERVICES; do + pcs -f $pacemaker_dumpfile resource update $service op start timeout=100s op stop timeout=100s + done + # mongod start timeout is higher, setting only stop timeout + pcs -f $pacemaker_dumpfile resource update mongod op stop timeout=100s + + echo "Applying new Pacemaker config" + pcs cluster cib-push $pacemaker_dumpfile + + echo "Pacemaker running, stopping cluster node and doing full package update" + node_count=$(pcs status xml | grep -o "" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*") + if [[ "$node_count" == "1" ]] ; then + echo "Active node count is 1, stopping node with --force" + pcs cluster stop --force + else + pcs cluster stop + fi + + # clean leftover keepalived and radvd instances from neutron + # (can be removed when we remove neutron-netns-cleanup from cluster services) + # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init + killall neutron-keepalived-state-change 2>/dev/null || : + kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || : + kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || : +else + echo "Excluding upgrading packages that are handled by config management tooling" + command_arguments="$command_arguments --skip-broken" + for exclude in $(cat /var/lib/tripleo/installed-packages/* | sort -u); do + command_arguments="$command_arguments --exclude $exclude" + done +fi + command=${command:-update} full_command="yum -y $command $command_arguments" echo "Running: $full_command" @@ -36,6 +162,38 @@ return_code=$? echo "$result" echo "yum return code: $return_code" +if [[ "$pacemaker_status" == "active" ]] ; then + echo "Starting cluster node" + pcs cluster start + + hostname=$(hostname -s) + tstart=$(date +%s) + while [[ "$(pcs status | grep "^Online" | grep -F -o $hostname)" == "" ]]; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_start_timeout )) ; then + echo "ERROR $hostname failed to join cluster in $cluster_start_timeout seconds" + pcs status + exit 1 + fi + done + + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo "ERROR galera sync timed out" + exit 1 + fi + done + + pcs status + +else + echo -n "true" > $heat_outputs_path.update_managed_packages +fi + echo "Finished yum_update.sh on server $deploy_server_id at `date`" exit $return_code