src/ceph/qa/tasks/osd_max_pg_per_osd.py

   1 import logging
   2 import random
   3
   4
   5 log = logging.getLogger(__name__)
   6
   7
   8 def pg_num_in_all_states(pgs, *states):
   9     return sum(1 for state in pgs.itervalues()
  10                if all(s in state for s in states))
  11
  12
  13 def pg_num_in_any_state(pgs, *states):
  14     return sum(1 for state in pgs.itervalues()
  15                if any(s in state for s in states))
  16
  17
  18 def test_create_from_mon(ctx, config):
  19     """
  20     osd should stop creating new pools if the number of pg it servers
  21     exceeds the max-pg-per-osd setting, and it should resume the previously
  22     suspended pg creations once the its pg number drops down below the setting
  23     How it works::
  24     1. set the hard limit of pg-per-osd to "2"
  25     2. create pool.a with pg_num=2
  26        # all pgs should be active+clean
  27     2. create pool.b with pg_num=2
  28        # new pgs belonging to this pool should be unknown (the primary osd
  29        reaches the limit) or creating (replica osd reaches the limit)
  30     3. remove pool.a
  31     4. all pg belonging to pool.b should be active+clean
  32     """
  33     pg_num = config.get('pg_num', 2)
  34     manager = ctx.managers['ceph']
  35     log.info('1. creating pool.a')
  36     pool_a = manager.create_pool_with_unique_name(pg_num)
  37     manager.wait_for_clean()
  38     assert manager.get_num_active_clean() == pg_num
  39
  40     log.info('2. creating pool.b')
  41     pool_b = manager.create_pool_with_unique_name(pg_num)
  42     pg_states = manager.wait_till_pg_convergence(300)
  43     pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
  44     assert pg_created == pg_num
  45     pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating')
  46     assert pg_pending == pg_num
  47
  48     log.info('3. removing pool.a')
  49     manager.remove_pool(pool_a)
  50     pg_states = manager.wait_till_pg_convergence(300)
  51     assert len(pg_states) == pg_num
  52     pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
  53     assert pg_created == pg_num
  54
  55     # cleanup
  56     manager.remove_pool(pool_b)
  57
  58
  59 def test_create_from_peer(ctx, config):
  60     """
  61     osd should stop creating new pools if the number of pg it servers
  62     exceeds the max-pg-per-osd setting, and it should resume the previously
  63     suspended pg creations once the its pg number drops down below the setting
  64
  65     How it works::
  66     0. create 4 OSDs.
  67     1. create pool.a with pg_num=1, size=2
  68        pg will be mapped to osd.0, and osd.1, and it should be active+clean
  69     2. create pool.b with pg_num=1, size=2.
  70        if the pgs stuck in creating, delete the pool since the pool and try
  71        again, eventually we'll get the pool to land on the other 2 osds that
  72        aren't occupied by pool.a. (this will also verify that pgs for deleted
  73        pools get cleaned out of the creating wait list.)
  74     3. mark an osd out. verify that some pgs get stuck stale or peering.
  75     4. delete a pool, verify pgs go active.
  76     """
  77     pg_num = config.get('pg_num', 1)
  78     pool_size = config.get('pool_size', 2)
  79     from_primary = config.get('from_primary', True)
  80
  81     manager = ctx.managers['ceph']
  82     log.info('1. creating pool.a')
  83     pool_a = manager.create_pool_with_unique_name(pg_num)
  84     manager.wait_for_clean()
  85     assert manager.get_num_active_clean() == pg_num
  86
  87     log.info('2. creating pool.b')
  88     while True:
  89         pool_b = manager.create_pool_with_unique_name(pg_num)
  90         pg_states = manager.wait_till_pg_convergence(300)
  91         pg_created = pg_num_in_all_states(pg_states, 'active', 'clean')
  92         assert pg_created >= pg_num
  93         pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating')
  94         assert pg_pending == pg_num * 2 - pg_created
  95         if pg_created == pg_num * 2:
  96             break
  97         manager.remove_pool(pool_b)
  98
  99     log.info('3. mark an osd out')
 100     pg_stats = manager.get_pg_stats()
 101     pg = random.choice(pg_stats)
 102     if from_primary:
 103         victim = pg['acting'][-1]
 104     else:
 105         victim = pg['acting'][0]
 106     manager.mark_out_osd(victim)
 107     pg_states = manager.wait_till_pg_convergence(300)
 108     pg_stuck = pg_num_in_any_state(pg_states, 'activating', 'stale', 'peering')
 109     assert pg_stuck > 0
 110
 111     log.info('4. removing pool.b')
 112     manager.remove_pool(pool_b)
 113     manager.wait_for_clean(30)
 114
 115     # cleanup
 116     manager.remove_pool(pool_a)
 117
 118
 119 def task(ctx, config):
 120     assert isinstance(config, dict), \
 121         'osd_max_pg_per_osd task only accepts a dict for config'
 122     manager = ctx.managers['ceph']
 123     if config.get('test_create_from_mon', True):
 124         test_create_from_mon(ctx, config)
 125     else:
 126         test_create_from_peer(ctx, config)