X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=extraconfig%2Ftasks%2Fmajor_upgrade_pacemaker_migrations.sh;h=7c9083a4e28187fa6c5543a8044236461b161f97;hb=2df74cc8295bcc96add61d559068153056ba3e5d;hp=b8c5321b451e7c6c442b0bde878f2e3b0619bc5c;hpb=00e926ef5a1a809bd0f6cf19b53b41419a19c2ae;p=apex-tripleo-heat-templates.git diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh index b8c5321b..7c9083a4 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh +++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh @@ -77,7 +77,6 @@ function services_to_migrate { openstack-aodh-evaluator-clone openstack-aodh-listener-clone openstack-aodh-notifier-clone - openstack-ceilometer-api-clone openstack-ceilometer-central-clone openstack-ceilometer-collector-clone openstack-ceilometer-notification-clone @@ -109,20 +108,17 @@ function services_to_migrate { # during the conversion # 2. Remove all the colocation constraints and then the ordering constraints, except the # ones related to haproxy/VIPs which exist in Newton as well -# 3. Remove all the resources that won't be managed by pacemaker in newton. Note that they -# will show up as ORPHANED but they will keep running normally via systemd. They will be -# enabled to start at boot by puppet during the converge step -# 4. Take the cluster out of maintenance-mode and do a resource cleanup +# 3. Take the cluster out of maintenance-mode +# 4. Remove all the resources that won't be managed by pacemaker in newton. The +# outcome will be +# that they are stopped and removed from pacemakers control +# 5. Do a resource cleanup to make sure the cluster is in a clean state function migrate_full_to_ng_ha { if [[ -n $(pcmk_running) ]]; then pcs property set maintenance-mode=true - # We are making sure here that the property has propagated everywhere - if ! timeout -k 10 300 crm_resource --wait; then - echo_error "ERROR: cluster remained unstable after setting maintenance-mode for more than 300 seconds, exiting." - exit 1 - fi - # First we go through all the colocation constraints (except the ones we want to keep, i.e. the haproxy/ip ones) - # and we remove those + + # First we go through all the colocation constraints (except the ones + # we want to keep, i.e. the haproxy/ip ones) and we remove those COL_CONSTRAINTS=$(pcs config show | sed -n '/^Colocation Constraints:$/,/^$/p' | grep -v "Colocation Constraints:" | egrep -v "ip-.*haproxy" | awk '{print $NF}' | cut -f2 -d: |cut -f1 -d\)) for constraint in $COL_CONSTRAINTS; do log_debug "Deleting colocation constraint $constraint from CIB" @@ -135,32 +131,35 @@ function migrate_full_to_ng_ha { log_debug "Deleting ordering constraint $constraint from CIB" pcs constraint remove "$constraint" done + # At this stage all the pacemaker resources are removed from the CIB. + # Once we remove the maintenance-mode those systemd resources will keep + # on running. They shall be systemd enabled via the puppet converge + # step later on + pcs property set maintenance-mode=false # At this stage there are no constraints whatsoever except the haproxy/ip ones - # which we want to keep. We now delete each resource that will move to systemd - # Note that the corresponding systemd resource will stay running, which means that - # later when we do the "yum update", things will be a bit slower because each - # "systemctl try-restart " is not a no-op any longer because the service is up - # and running and it will be restarted with rabbitmq being down. + # which we want to keep. We now disable and then delete each resource + # that will move to systemd. + # We want the systemd resources be stopped before doing "yum update", + # that way "systemctl try-restart " is no-op because the + # service was down already PCS_STATUS_OUTPUT="$(pcs status)" for resource in $(services_to_migrate) "delay-clone" "openstack-core-clone"; do if echo "$PCS_STATUS_OUTPUT" | grep "$resource"; then log_debug "Deleting $resource from the CIB" - - # We need to add --force because the cluster is in maintenance mode and the resource - # is unmanaged. The if serves to make this idempotent + if ! pcs resource disable "$resource" --wait=600; then + echo_error "ERROR: resource $resource failed to be disabled" + exit 1 + fi pcs resource delete --force "$resource" else - log_debug "Service $service not found as a pacemaker resource, not trying to delete." + log_debug "Service $resource not found as a pacemaker resource, not trying to delete." fi done - # At this stage all the pacemaker resources are removed from the CIB. Once we remove the - # maintenance-mode those systemd resources will keep on running. They shall be systemd enabled - # via the puppet converge step later on - pcs property set maintenance-mode=false - # We need to do a pcs resource cleanup here + crm_resource --wait to make sure the - # cluster is in a clean state before we stop everything, upgrade and restart everything + # We need to do a pcs resource cleanup here + crm_resource --wait to + # make sure the cluster is in a clean state before we stop everything, + # upgrade and restart everything pcs resource cleanup # We are making sure here that the cluster is stable before proceeding if ! timeout -k 10 600 crm_resource --wait; then @@ -169,3 +168,14 @@ function migrate_full_to_ng_ha { fi fi } + +function disable_standalone_ceilometer_api { + if [[ -n $(is_bootstrap_node) ]]; then + if [[ -n $(is_pacemaker_managed openstack-ceilometer-api) ]]; then + # Disable pacemaker resources for ceilometer-api + manage_pacemaker_service disable openstack-ceilometer-api + check_resource_pacemaker openstack-ceilometer-api stopped 600 + pcs resource delete openstack-ceilometer-api --wait=600 + fi + fi +}