Merge "Don't start all services during upgrade steps"
[apex-tripleo-heat-templates.git] / extraconfig / tasks / yum_update.sh
index f3c3b4b..74af7b0 100755 (executable)
@@ -43,104 +43,29 @@ if [[ "$list_updates" == "" ]]; then
 fi
 
 pacemaker_status=$(systemctl is-active pacemaker)
-pacemaker_dumpfile=$(mktemp)
 
-if [[ "$pacemaker_status" == "active" ]] ; then
-SERVICES="memcached
-httpd
-neutron-dhcp-agent
-neutron-l3-agent
-neutron-metadata-agent
-neutron-openvswitch-agent
-neutron-server
-openstack-ceilometer-api
-openstack-ceilometer-central
-openstack-ceilometer-collector
-openstack-ceilometer-notification
-openstack-aodh-evaluator
-openstack-aodh-notifier
-openstack-aodh-listener
-openstack-cinder-api
-openstack-cinder-scheduler
-openstack-cinder-volume
-openstack-glance-api
-openstack-glance-registry
-openstack-heat-api
-openstack-heat-api-cfn
-openstack-heat-api-cloudwatch
-openstack-heat-engine
-openstack-keystone
-openstack-nova-api
-openstack-nova-conductor
-openstack-nova-consoleauth
-openstack-nova-novncproxy
-openstack-nova-scheduler"
-
-    echo "Dumping Pacemaker config"
-    pcs cluster cib $pacemaker_dumpfile
-
-    echo "Checking for missing constraints"
-
-    if ! pcs constraint order show | grep "start openstack-nova-novncproxy-clone then start openstack-nova-api-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start openstack-nova-novncproxy-clone then openstack-nova-api-clone
-    fi
-
-    if ! pcs constraint order show | grep "start rabbitmq-clone then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start rabbitmq-clone then openstack-keystone-clone
-    fi
-
-    if ! pcs constraint order show | grep "promote galera-master then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote galera-master then openstack-keystone-clone
-    fi
-
-    if pcs resource | grep "haproxy-clone"; then
-        SERVICES="$SERVICES haproxy"
-        if ! pcs constraint order show | grep "start haproxy-clone then start openstack-keystone-clone"; then
-            pcs -f $pacemaker_dumpfile constraint order start haproxy-clone then openstack-keystone-clone
-        fi
-    fi
-
-    if ! pcs constraint order show | grep "start memcached-clone then start openstack-keystone-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start memcached-clone then openstack-keystone-clone
-    fi
-
-    if ! pcs constraint order show | grep "promote redis-master then start openstack-ceilometer-central-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false
-    fi
-
-    if ! pcs constraint order show | grep "promote redis-master then start openstack-aodh-evaluator-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-aodh-evaluator-clone require-all=false
+# Fix the redis/rabbit resource start/stop timeouts. See https://bugs.launchpad.net/tripleo/+bug/1633455
+# and https://bugs.launchpad.net/tripleo/+bug/1634851
+if [[ "$pacemaker_status" == "active" && \
+      "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]] ; then
+    if pcs resource show rabbitmq | grep -E "start.*timeout=100"; then
+        pcs resource update rabbitmq op start timeout=200s
     fi
-    # ensure neutron constraints https://review.openstack.org/#/c/229466
-    # remove ovs-cleanup after server and add openvswitch-agent instead
-    if  pcs constraint order show  | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then
-        pcs -f $pacemaker_dumpfile constraint remove order-neutron-server-clone-neutron-ovs-cleanup-clone-mandatory
+    if pcs resource show rabbitmq | grep -E "stop.*timeout=90"; then
+        pcs resource update rabbitmq op stop timeout=200s
     fi
-    if ! pcs constraint order show | grep "start neutron-server-clone then start neutron-openvswitch-agent-clone"; then
-        pcs -f $pacemaker_dumpfile constraint order start neutron-server-clone then neutron-openvswitch-agent-clone
+    if pcs resource show redis | grep -E "start.*timeout=120"; then
+        pcs resource update redis op start timeout=200s
     fi
-
-
-    if ! pcs resource defaults | grep "resource-stickiness: INFINITY"; then
-        pcs -f $pacemaker_dumpfile resource defaults resource-stickiness=INFINITY
+    if pcs resource show redis | grep -E "stop.*timeout=120"; then
+        pcs resource update redis op stop timeout=200s
     fi
+fi
 
-    echo "Setting resource start/stop timeouts"
-    for service in $SERVICES; do
-        pcs -f $pacemaker_dumpfile resource update $service op start timeout=200s op stop timeout=200s
-    done
-    # mongod start timeout is higher, setting only stop timeout
-    pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op  stop timeout=200s
-
-    echo "Making sure rabbitmq has the notify=true meta parameter"
-    pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true
-
-    echo "Applying new Pacemaker config"
-    if ! pcs cluster cib-push $pacemaker_dumpfile; then
-        echo "ERROR failed to apply new pacemaker config"
-        exit 1
-    fi
+# Special-case OVS for https://bugs.launchpad.net/tripleo/+bug/1635205
+special_case_ovs_upgrade_if_needed
 
+if [[ "$pacemaker_status" == "active" ]] ; then
     echo "Pacemaker running, stopping cluster node and doing full package update"
     node_count=$(pcs status xml | grep -o "<nodes_configured.*/>" | grep -o 'number="[0-9]*"' | grep -o "[0-9]*")
     if [[ "$node_count" == "1" ]] ; then
@@ -149,16 +74,10 @@ openstack-nova-scheduler"
     else
         pcs cluster stop
     fi
-
-    # clean leftover keepalived and radvd instances from neutron
-    # (can be removed when we remove neutron-netns-cleanup from cluster services)
-    # see https://review.gerrithub.io/#/c/248931/1/neutron-netns-cleanup.init
-    killall neutron-keepalived-state-change 2>/dev/null || :
-    kill $(ps ax | grep -e "keepalived.*\.pid-vrrp" | awk '{print $1}') 2>/dev/null || :
-    kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || :
 else
-    echo "Upgrading openstack-puppet-modules"
+    echo "Upgrading openstack-puppet-modules and its dependencies"
     yum -q -y update openstack-puppet-modules
+    yum deplist openstack-puppet-modules | awk '/dependency/{print $2}' | xargs yum -q -y update
     echo "Upgrading other packages is handled by config management tooling"
     echo -n "true" > $heat_outputs_path.update_managed_packages
     exit 0
@@ -173,6 +92,17 @@ return_code=$?
 echo "$result"
 echo "yum return code: $return_code"
 
+# Writes any changes caused by alterations to os-net-config and bounces the
+# interfaces *before* restarting the cluster.
+os-net-config -c /etc/os-net-config/config.json -v --detailed-exit-codes
+RETVAL=$?
+if [[ $RETVAL == 2 ]]; then
+    echo "os-net-config: interface configuration files updated successfully"
+elif [[ $RETVAL != 0 ]]; then
+    echo "ERROR: os-net-config configuration failed"
+    exit $RETVAL
+fi
+
 if [[ "$pacemaker_status" == "active" ]] ; then
     echo "Starting cluster node"
     pcs cluster start