extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh

   1 #!/bin/bash
   2
   3 set -eu
   4
   5 cluster_form_timeout=600
   6 cluster_settle_timeout=1800
   7 galera_sync_timeout=600
   8
   9 if [[ -n $(is_bootstrap_node) ]]; then
  10     pcs cluster start --all
  11
  12     tstart=$(date +%s)
  13     while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do
  14         sleep 5
  15         tnow=$(date +%s)
  16         if (( tnow-tstart > cluster_form_timeout )) ; then
  17             echo_error "ERROR: timed out forming the cluster"
  18             exit 1
  19         fi
  20     done
  21
  22     if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then
  23         echo_error "ERROR: timed out waiting for cluster to finish transition"
  24         exit 1
  25     fi
  26
  27     for vip in $(pcs resource show | grep ocf::heartbeat:IPaddr2 | grep Stopped | awk '{ print $1 }'); do
  28       pcs resource enable $vip
  29       check_resource_pacemaker $vip started 60
  30     done
  31 fi
  32
  33 start_or_enable_service galera
  34 check_resource galera started 600
  35 # We need mongod which is now a systemd service up and running before calling
  36 # ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes
  37 # so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely
  38 # we should be good.
  39 # Due to LP Bug https://bugs.launchpad.net/tripleo/+bug/1627254 am using systemctl directly atm
  40 systemctl start mongod
  41 check_resource mongod started 600
  42
  43 if [[ -n $(is_bootstrap_node) ]]; then
  44     tstart=$(date +%s)
  45     while ! clustercheck; do
  46         sleep 5
  47         tnow=$(date +%s)
  48         if (( tnow-tstart > galera_sync_timeout )) ; then
  49             echo_error "ERROR galera sync timed out"
  50             exit 1
  51         fi
  52     done
  53
  54     # Run all the db syncs
  55     # TODO: check if this can be triggered in puppet and removed from here
  56     ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf
  57     cinder-manage db sync
  58     glance-manage --config-file=/etc/glance/glance-registry.conf db_sync
  59     heat-manage --config-file /etc/heat/heat.conf db_sync
  60     keystone-manage db_sync
  61     neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head
  62     nova-manage db sync
  63     #TODO(marios):someone from sahara needs to check this:
  64     # sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head
  65 fi
  66
  67 start_or_enable_service rabbitmq
  68 check_resource rabbitmq started 600
  69 start_or_enable_service redis
  70 check_resource redis started 600
  71
  72 # Swift isn't controled by pacemaker
  73 systemctl_swift start
  74
  75 # We need to start the systemd services we explicitely stopped at step _1.sh
  76 # FIXME: Should we let puppet during the convergence step do the service enabling or
  77 # should we add it here?
  78 for $service in $(services_to_migrate); do
  79     manage_systemd_service stop "${service%%-clone}"
  80     check_resource_systemd "${service%%-clone}" started 600
  81 done