Merge "Fix typo in Keystone Sensu subscription"
[apex-tripleo-heat-templates.git] / extraconfig / tasks / yum_update.sh
index 66efc5c..4612f19 100755 (executable)
@@ -23,7 +23,7 @@ update_identifier=${update_identifier//[^a-zA-Z0-9-_]/}
 
 # seconds to wait for this node to rejoin the cluster after update
 cluster_start_timeout=600
-galera_sync_timeout=360
+galera_sync_timeout=1800
 cluster_settle_timeout=1800
 
 timestamp_file="$timestamp_dir/$update_identifier"
@@ -43,104 +43,27 @@ if [[ "$list_updates" == "" ]]; then
 fi
 
 pacemaker_status=$(systemctl is-active pacemaker)
-pacemaker_dumpfile=$(mktemp)
 
-if [[ "$pacemaker_status" == "active" ]] ; then
-SERVICES="memcached
-httpd
-neutron-dhcp-agent
-neutron-l3-agent
-neutron-metadata-agent
-neutron-openvswitch-agent
-neutron-server
-openstack-ceilometer-api
-openstack-ceilometer-central
-openstack-ceilometer-collector
-openstack-ceilometer-notification
-openstack-aodh-evaluator
-openstack-aodh-notifier
-openstack-aodh-listener
-openstack-cinder-api
-openstack-cinder-scheduler
-openstack-cinder-volume
-openstack-glance-api
-openstack-glance-registry
-openstack-heat-api
-openstack-heat-api-cfn
-openstack-heat-api-cloudwatch
-openstack-heat-engine
-openstack-keystone
-openstack-nova-api
-openstack-nova-conductor
-openstack-nova-consoleauth
-openstack-nova-novncproxy
-openstack-nova-scheduler"
-
-    echo "Dumping Pacemaker config"
-    pcs cluster cib $pacemaker_dumpfile
-
-    echo "Checking for missing constraints"
-
-    if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
-    fi
-
-    if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone
-    fi
-
-    if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone
-    fi
-
-    if pcs resource | grep "haproxy-clone"; then
-        SERVICES="$SERVICES haproxy"
-        if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
-            pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone
-        fi
+# Fix the redis/rabbit resource start/stop timeouts. See https://bugs.launchpad.net/tripleo/+bug/1633455
+# and https://bugs.launchpad.net/tripleo/+bug/1634851
+if [[ "$pacemaker_status" == "active" && \
+      "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]] ; then
+    if pcs resource show rabbitmq | grep -E "start.*timeout=100"; then
+        pcs resource update rabbitmq op start timeout=200s
     fi
-
-    if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone
+    if pcs resource show rabbitmq | grep -E "stop.*timeout=90"; then
+        pcs resource update rabbitmq op stop timeout=200s
     fi
-
-    if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
+    if pcs resource show redis | grep -E "start.*timeout=120"; then
+        pcs resource update redis op start timeout=200s
     fi
-
-    if ! pcs constraint order show | grep "promote redis-master then start openstack-aodh-evaluator-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-aodh-evaluator-clone require-all=false
+    if pcs resource show redis | grep -E "stop.*timeout=120"; then
+        pcs resource update redis op stop timeout=200s
     fi
-    # ensure neutron constraints https://review.openstack.org/#/c/229466
-    # remove ovs-cleanup after server and add openvswitch-agent instead
-    if  pcs constraint order show  | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then
-        pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory
-    fi
-    if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone
-    fi
-
-
-    if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
-        pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY
-    fi
-
-    echo "Setting resource start/stop timeouts"
-    for service in $SERVICES; do
-        pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s
-    done
-    # mongod start timeout is higher, setting only stop timeout
-    pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op  stop timeout=200s
+fi
 
-    echo "Making sure rabbitmq has the notify=true meta parameter"
-    pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true
-
-    echo "Applying new Pacemaker config"
-    if ! pcs cluster cib-push $pacemaker_dumpfile; then
-        echo "ERROR failed to apply new pacemaker config"
-        exit 1
-    fi
 
+if [[ "$pacemaker_status" == "active" ]] ; then
     echo "Pacemaker running, stopping cluster node and doing full package update"
     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
     if [[ "$node_count" == "1" ]] ; then
@@ -149,21 +72,29 @@ openstack-nova-scheduler"
     else
         pcs cluster stop
     fi
-
-    # clean leftover keepalived and radvd instances from neutron
-    # (can be removed when we remove neutron-netns-cleanup from cluster services)
-    # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init
-    killall neutron-keepalived-state-change 2>/dev/null || :
-    kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || :
-    kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || :
 else
-    echo "Upgrading openstack-puppet-modules"
+    echo "Upgrading openstack-puppet-modules and its dependencies"
     yum -q -y update openstack-puppet-modules
+    yum deplist openstack-puppet-modules | awk '/dependency/{print $2}' | xargs yum -q -y update
     echo "Upgrading other packages is handled by config management tooling"
     echo -n "true" > $heat_outputs_path.update_managed_packages
     exit 0
 fi
 
+# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
+if [[ -n $(rpm -q --scripts openvswitch | awk '/postuninstall/,/*/' | grep "systemctl.*try-restart") ]]; then
+    echo "Manual upgrade of openvswitch - restart in postun detected"
+    mkdir OVS_UPGRADE || true
+    pushd OVS_UPGRADE
+    echo "Attempting to downloading latest openvswitch with yumdownloader"
+    yumdownloader --resolve openvswitch
+    echo "Updating openvswitch with nopostun option"
+    rpm -U --replacepkgs --nopostun ./*.rpm
+    popd
+else
+    echo "Skipping manual upgrade of openvswitch - no restart in postun detected"
+fi
+
 command=${command:-update}
 full_command="yum -q -y $command $command_arguments"
 echo "Running: $full_command"