X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=extraconfig%2Ftasks%2Fmajor_upgrade_controller_pacemaker_3.sh;h=6748f891eb3bdb5c5717ae60a5833bdde8eeb5b4;hb=6998edc64a16e1dd859008caba719baeebf43d37;hp=b653c7c7503340170997b7fd9e794233debe5c23;hpb=a1b7af553de02e79f47af4110955e5fc73c385f3;p=apex-tripleo-heat-templates.git diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_3.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_3.sh index b653c7c7..6748f891 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_3.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_3.sh @@ -2,25 +2,67 @@ set -eu -start_or_enable_service rabbitmq -check_resource rabbitmq started 600 +cluster_form_timeout=600 +cluster_settle_timeout=1800 +galera_sync_timeout=600 + +if [[ -n $(is_bootstrap_node) ]]; then + pcs cluster start --all + + tstart=$(date +%s) + while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_form_timeout )) ; then + echo_error "ERROR: timed out forming the cluster" + exit 1 + fi + done + + if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then + echo_error "ERROR: timed out waiting for cluster to finish transition" + exit 1 + fi + + for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do + pcs resource enable $vip + check_resource_pacemaker $vip started 60 + done +fi + +start_or_enable_service galera +check_resource galera started 600 start_or_enable_service redis check_resource redis started 600 -start_or_enable_service openstack-cinder-volume -check_resource openstack-cinder-volume started 600 - +# We need mongod which is now a systemd service up and running before calling +# ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes +# so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely +# we should be good. +# Due to LP Bug https://bugs.launchpad.net/tripleo/+bug/1627254 am using systemctl directly atm +systemctl start mongod +check_resource mongod started 600 -# Swift isn't controled by pacemaker -systemctl_swift start +if [[ -n $(is_bootstrap_node) ]]; then + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo_error "ERROR galera sync timed out" + exit 1 + fi + done -# We need to start the systemd services we explicitely stopped at step _1.sh -# FIXME: Should we let puppet during the convergence step do the service enabling or -# should we add it here? -services=$(services_to_migrate) -if [[ ${keep_sahara_services_on_upgrade} =~ [Ff]alse ]] ; then - services=${services%%openstack-sahara*} + # Run all the db syncs + # TODO: check if this can be triggered in puppet and removed from here + ceilometer-upgrade --config-file=/etc/ceilometer/ceilometer.conf --skip-gnocchi-resource-types + cinder-manage db sync + glance-manage --config-file=/etc/glance/glance-registry.conf db_sync + heat-manage --config-file /etc/heat/heat.conf db_sync + keystone-manage db_sync + neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head + nova-manage db sync + nova-manage api_db sync + nova-manage db online_data_migrations + sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head fi -for service in $services; do - manage_systemd_service start "${service%%-clone}" - check_resource_systemd "${service%%-clone}" started 600 -done