X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fqa%2Ftasks%2Fmgr%2Ftest_failover.py;fp=src%2Fceph%2Fqa%2Ftasks%2Fmgr%2Ftest_failover.py;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=0dd9cb7e8bacb02e9c522020468ce927401bd716;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/qa/tasks/mgr/test_failover.py b/src/ceph/qa/tasks/mgr/test_failover.py deleted file mode 100644 index 0dd9cb7..0000000 --- a/src/ceph/qa/tasks/mgr/test_failover.py +++ /dev/null @@ -1,144 +0,0 @@ - -import logging -import json - -from tasks.mgr.mgr_test_case import MgrTestCase - - -log = logging.getLogger(__name__) - - -class TestFailover(MgrTestCase): - MGRS_REQUIRED = 2 - - def test_timeout(self): - """ - That when an active mgr stops responding, a standby is promoted - after mon_mgr_beacon_grace. - """ - - # Query which mgr is active - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - # Stop that daemon - self.mgr_cluster.mgr_stop(original_active) - - # Assert that the other mgr becomes active - self.wait_until_true( - lambda: self.mgr_cluster.get_active_id() in original_standbys, - timeout=60 - ) - - self.mgr_cluster.mgr_restart(original_active) - self.wait_until_true( - lambda: original_active in self.mgr_cluster.get_standby_ids(), - timeout=10 - ) - - def test_timeout_nostandby(self): - """ - That when an active mgr stop responding, and no standby is - available, the active mgr is removed from the map anyway. - """ - # Query which mgr is active - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - for s in original_standbys: - self.mgr_cluster.mgr_stop(s) - self.mgr_cluster.mgr_fail(s) - - self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) - self.assertEqual(self.mgr_cluster.get_active_id(), original_active) - - grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace")) - log.info("Should time out in about {0} seconds".format(grace)) - - self.mgr_cluster.mgr_stop(original_active) - - # Now wait for the mon to notice the mgr is gone and remove it - # from the map. - self.wait_until_equal( - lambda: self.mgr_cluster.get_active_id(), - "", - timeout=grace * 2 - ) - - self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) - self.assertEqual(self.mgr_cluster.get_active_id(), "") - - def test_explicit_fail(self): - """ - That when a user explicitly fails a daemon, a standby immediately - replaces it. - :return: - """ - # Query which mgr is active - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - self.mgr_cluster.mgr_fail(original_active) - - # A standby should take over - self.wait_until_true( - lambda: self.mgr_cluster.get_active_id() in original_standbys, - timeout=60 - ) - - # The one we failed should come back as a standby (he isn't - # really dead) - self.wait_until_true( - lambda: original_active in self.mgr_cluster.get_standby_ids(), - timeout=10 - ) - - # Both daemons should have fully populated metadata - # (regression test for http://tracker.ceph.com/issues/21260) - meta = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( - "mgr", "metadata")) - id_to_meta = dict([(i['id'], i) for i in meta]) - for i in [original_active] + original_standbys: - self.assertIn(i, id_to_meta) - self.assertIn('ceph_version', id_to_meta[i]) - - # We should be able to fail back over again: the exercises - # our re-initialization of the python runtime within - # a single process lifetime. - - # Get rid of any bystander standbys so that the original_active - # will be selected as next active. - new_active = self.mgr_cluster.get_active_id() - for daemon in original_standbys: - if daemon != new_active: - self.mgr_cluster.mgr_stop(daemon) - self.mgr_cluster.mgr_fail(daemon) - - self.assertListEqual(self.mgr_cluster.get_standby_ids(), - [original_active]) - - self.mgr_cluster.mgr_stop(new_active) - self.mgr_cluster.mgr_fail(new_active) - - self.assertEqual(self.mgr_cluster.get_active_id(), original_active) - self.assertEqual(self.mgr_cluster.get_standby_ids(), []) - - def test_standby_timeout(self): - """ - That when a standby daemon stops sending beacons, it is - removed from the list of standbys - :return: - """ - original_active = self.mgr_cluster.get_active_id() - original_standbys = self.mgr_cluster.get_standby_ids() - - victim = original_standbys[0] - self.mgr_cluster.mgr_stop(victim) - - expect_standbys = set(original_standbys) - {victim} - - self.wait_until_true( - lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys, - timeout=60 - ) - self.assertEqual(self.mgr_cluster.get_active_id(), original_active)