Initial VIP ipv6 minor update code
authorMichele Baldessari <michele@acksyn.org>
Thu, 27 Apr 2017 19:41:11 +0000 (21:41 +0200)
committerMichele Baldessari <michele@acksyn.org>
Tue, 2 May 2017 18:01:21 +0000 (20:01 +0200)
To test this change we deployed a stock master with ipv6 which created a bunch
of ipv6 with /64 netmask:
[root@overcloud-controller-0 ~]# pcs resource show ip-fd00.fd00.fd00.2000..18
 Resource: ip-fd00.fd00.fd00.2000..18 (class=ocf provider=heartbeat type=IPaddr2)
  Attributes: ip=fd00:fd00:fd00:2000::18 cidr_netmask=64
  Operations: start interval=0s timeout=20s (ip-fd00.fd00.fd00.2000..18-start-interval-0s)
              stop interval=0s timeout=20s (ip-fd00.fd00.fd00.2000..18-stop-interval-0s)
              monitor interval=10s timeout=20s (ip-fd00.fd00.fd00.2000..18-monitor-interval-10s)

Then we update the THT folder with this patch and upload the new scripts on the undercloud via:
openstack overcloud deploy --update-plan-only ....

Then we kick off the minor update workflow:
openstack overcloud update stack -i overcloud

Once the controller-0 node (bootstrap node for pacemaker) is completed we have the
correct VIP configuration:
[root@overcloud-controller-0 heat-config-script]# pcs resource show ip-fd00.fd00.fd00.2000..18
 Resource: ip-fd00.fd00.fd00.2000..18 (class=ocf provider=heartbeat type=IPaddr2)
  Attributes: ip=fd00:fd00:fd00:2000::18 cidr_netmask=128 nic=vlan20 lvs_ipv6_addrlabel=true lvs_ipv6_addrlabel_value=99
  Operations: start interval=0s timeout=20s (ip-fd00.fd00.fd00.2000..18-start-interval-0s)
              stop interval=0s timeout=20s (ip-fd00.fd00.fd00.2000..18-stop-interval-0s)
              monitor interval=10s timeout=20s (ip-fd00.fd00.fd00.2000..18-monitor-interval-10s)

Also verified that running the script a second time does not alter the
(already fixed) VIPs.

Co-Authored-By: Damien Ciabrini <dciabrin@redhat.com>
Change-Id: I765cd5c9b57134dff61f67ce726bf88af90f8090

extraconfig/tasks/pacemaker_common_functions.sh
extraconfig/tasks/yum_update.sh

index 4480f74..f17a073 100755 (executable)
@@ -322,3 +322,52 @@ function special_case_ovs_upgrade_if_needed {
 
 }
 
+# This code is meant to fix https://bugs.launchpad.net/tripleo/+bug/1686357 on
+# existing setups via a minor update workflow and be idempotent. We need to
+# run this before the yum update because we fix this up even when there are no
+# packages to update on the system (in which case the script exits).
+# This code must be called with set +eu (due to the ocf scripts being sourced)
+function fixup_wrong_ipv6_vip {
+    # This XPath query identifies of all the VIPs in pacemaker with netmask /64. Those are IPv6 only resources that have the wrong netmask
+    # This gives the address of the resource in the CIB, one address per line. For example:
+    # /cib/configuration/resources/primitive[@id='ip-2001.db8.ca2.4..10']/instance_attributes[@id='ip-2001.db8.ca2.4..10-instance_attributes']\
+    # /nvpair[@id='ip-2001.db8.ca2.4..10-instance_attributes-cidr_netmask']
+    vip_xpath_query="//resources/primitive[@type='IPaddr2']/instance_attributes/nvpair[@name='cidr_netmask' and @value='64']"
+    vip_xpath_xml_addresses=$(cibadmin --query --xpath "$vip_xpath_query" -e 2>/dev/null)
+    # The following extracts the @id value of the resource
+    vip_resources_to_fix=$(echo -e "$vip_xpath_xml_addresses" | sed -n "s/.*primitive\[@id='\([^']*\)'.*/\1/p")
+    # Runnning this in a subshell so that sourcing files cannot possibly affect the running script
+    (
+        OCF_PATH="/usr/lib/ocf/lib/heartbeat"
+        if [ -n "$vip_resources_to_fix" -a -f $OCF_PATH/ocf-shellfuncs -a -f $OCF_PATH/findif.sh ]; then
+            source $OCF_PATH/ocf-shellfuncs
+            source $OCF_PATH/findif.sh
+            for resource in $vip_resources_to_fix; do
+                echo "Updating IPv6 VIP $resource with a /128 and a correct addrlabel"
+                # The following will give us something like:
+                # <nvpair id="ip-2001.db8.ca2.4..10-instance_attributes-ip" name="ip" value="2001:db8:ca2:4::10"/>
+                ip_cib_nvpair=$(cibadmin --query --xpath "//resources/primitive[@type='IPaddr2' and @id='$resource']/instance_attributes/nvpair[@name='ip']")
+                # Let's filter out the value of the nvpair to get the ip address
+                ip_address=$(echo $ip_cib_nvpair | xmllint --xpath 'string(//nvpair/@value)' -)
+                OCF_RESKEY_cidr_netmask="64"
+                OCF_RESKEY_ip="$ip_address"
+                # Unfortunately due to https://bugzilla.redhat.com/show_bug.cgi?id=1445628
+                # we need to find out the appropiate nic given the ip address.
+                nic=$(findif $ip_address | awk '{ print $1 }')
+                ret=$?
+                if [ -z "$nic" -o $ret -ne 0 ]; then
+                    echo "NIC autodetection failed for VIP $ip_address, not updating VIPs"
+                    # Only exits the subshell
+                    exit 1
+                fi
+                ocf_run -info pcs resource update --wait "$resource" ip="$ip_address" cidr_netmask=128 nic="$nic" lvs_ipv6_addrlabel=true lvs_ipv6_addrlabel_value=99
+                ret=$?
+                if [ $ret -ne 0 ]; then
+                    echo "pcs resource update for VIP $resource failed, not updating VIPs"
+                    # Only exits the subshell
+                    exit 1
+                fi
+            done
+        fi
+    )
+}
index 018c9b7..83d6d8d 100755 (executable)
@@ -38,6 +38,29 @@ if [[ -a "$timestamp_file" ]]; then
 fi
 touch "$timestamp_file"
 
+pacemaker_status=""
+if hiera -c /etc/puppet/hiera.yaml service_names | grep -q pacemaker; then
+    pacemaker_status=$(systemctl is-active pacemaker)
+fi
+
+# (NB: when backporting this s/pacemaker_short_bootstrap_node_name/bootstrap_nodeid)
+# This runs before the yum_update so we are guaranteed to run it even in the absence
+# of packages to update (the check for -z "$update_identifier" guarantees that this
+# is run only on overcloud stack update -i)
+if [[ "$pacemaker_status" == "active" && \
+        "$(hiera -c /etc/puppet/hiera.yaml pacemaker_short_bootstrap_node_name)" == "$(facter hostname)" ]] ; then \
+    # OCF scripts don't cope with -eu
+    echo "Verifying if we need to fix up any IPv6 VIPs"
+    set +eu
+    fixup_wrong_ipv6_vip
+    ret=$?
+    set -eu
+    if [ $ret -ne 0 ]; then
+        echo "Fixing up IPv6 VIPs failed. Stopping here. (See https://bugs.launchpad.net/tripleo/+bug/1686357 for more info)"
+        exit 1
+    fi
+fi
+
 command_arguments=${command_arguments:-}
 
 # yum check-update exits 100 if updates are available
@@ -55,10 +78,6 @@ elif [[ "$check_update_exit" != "100" ]]; then
     exit 0
 fi
 
-pacemaker_status=""
-if hiera -c /etc/puppet/hiera.yaml service_names | grep -q pacemaker; then
-    pacemaker_status=$(systemctl is-active pacemaker)
-fi
 
 # special case https://bugs.launchpad.net/tripleo/+bug/1635205 +bug/1669714
 special_case_ovs_upgrade_if_needed
@@ -129,6 +148,7 @@ if [[ "$pacemaker_status" == "active" ]] ; then
     pcs status
 fi
 
-echo "Finished yum_update.sh on server $deploy_server_id at `date`"
+
+echo "Finished yum_update.sh on server $deploy_server_id at `date` with return code: $return_code"
 
 exit $return_code