src/ceph/qa/tasks/thrashosds.py

   1 """
   2 Thrash -- Simulate random osd failures.
   3 """
   4 import contextlib
   5 import logging
   6 import ceph_manager
   7 from teuthology import misc as teuthology
   8
   9
  10 log = logging.getLogger(__name__)
  11
  12 @contextlib.contextmanager
  13 def task(ctx, config):
  14     """
  15     "Thrash" the OSDs by randomly marking them out/down (and then back
  16     in) until the task is ended. This loops, and every op_delay
  17     seconds it randomly chooses to add or remove an OSD (even odds)
  18     unless there are fewer than min_out OSDs out of the cluster, or
  19     more than min_in OSDs in the cluster.
  20
  21     All commands are run on mon0 and it stops when __exit__ is called.
  22
  23     The config is optional, and is a dict containing some or all of:
  24
  25     cluster: (default 'ceph') the name of the cluster to thrash
  26
  27     min_in: (default 4) the minimum number of OSDs to keep in the
  28        cluster
  29
  30     min_out: (default 0) the minimum number of OSDs to keep out of the
  31        cluster
  32
  33     op_delay: (5) the length of time to sleep between changing an
  34        OSD's status
  35
  36     min_dead: (0) minimum number of osds to leave down/dead.
  37
  38     max_dead: (0) maximum number of osds to leave down/dead before waiting
  39        for clean.  This should probably be num_replicas - 1.
  40
  41     clean_interval: (60) the approximate length of time to loop before
  42        waiting until the cluster goes clean. (In reality this is used
  43        to probabilistically choose when to wait, and the method used
  44        makes it closer to -- but not identical to -- the half-life.)
  45
  46     scrub_interval: (-1) the approximate length of time to loop before
  47        waiting until a scrub is performed while cleaning. (In reality
  48        this is used to probabilistically choose when to wait, and it
  49        only applies to the cases where cleaning is being performed).
  50        -1 is used to indicate that no scrubbing will be done.
  51
  52     chance_down: (0.4) the probability that the thrasher will mark an
  53        OSD down rather than marking it out. (The thrasher will not
  54        consider that OSD out of the cluster, since presently an OSD
  55        wrongly marked down will mark itself back up again.) This value
  56        can be either an integer (eg, 75) or a float probability (eg
  57        0.75).
  58
  59     chance_test_min_size: (0) chance to run test_pool_min_size,
  60        which:
  61        - kills all but one osd
  62        - waits
  63        - kills that osd
  64        - revives all other osds
  65        - verifies that the osds fully recover
  66
  67     timeout: (360) the number of seconds to wait for the cluster
  68        to become clean after each cluster change. If this doesn't
  69        happen within the timeout, an exception will be raised.
  70
  71     revive_timeout: (150) number of seconds to wait for an osd asok to
  72        appear after attempting to revive the osd
  73
  74     thrash_primary_affinity: (true) randomly adjust primary-affinity
  75
  76     chance_pgnum_grow: (0) chance to increase a pool's size
  77     chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
  78     pool_grow_by: (10) amount to increase pgnum by
  79     max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
  80
  81     pause_short: (3) duration of short pause
  82     pause_long: (80) duration of long pause
  83     pause_check_after: (50) assert osd down after this long
  84     chance_inject_pause_short: (1) chance of injecting short stall
  85     chance_inject_pause_long: (0) chance of injecting long stall
  86
  87     clean_wait: (0) duration to wait before resuming thrashing once clean
  88
  89     sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
  90                   random live osd
  91
  92     powercycle: (false) whether to power cycle the node instead
  93         of just the osd process. Note that this assumes that a single
  94         osd is the only important process on the node.
  95
  96     bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
  97         the delay lets the BlockDevice "accept" more aio operations but blocks
  98         any flush, and then eventually crashes (losing some or all ios).  If 0,
  99         no bdev failure injection is enabled.
 100
 101     bdev_inject_crash_probability: (.5) probability of doing a bdev failure
 102         injection crash vs a normal OSD kill.
 103
 104     chance_test_backfill_full: (0) chance to simulate full disks stopping
 105         backfill
 106
 107     chance_test_map_discontinuity: (0) chance to test map discontinuity
 108     map_discontinuity_sleep_time: (40) time to wait for map trims
 109
 110     ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
 111     chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
 112
 113     optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
 114                   enablement to all osds
 115
 116     dump_ops_enable: (true) continuously dump ops on all live osds
 117
 118     noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub
 119
 120     disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based
 121                                     tests
 122
 123     chance_thrash_cluster_full: .05
 124
 125     chance_thrash_pg_upmap: 1.0
 126     chance_thrash_pg_upmap_items: 1.0
 127
 128     example:
 129
 130     tasks:
 131     - ceph:
 132     - thrashosds:
 133         cluster: ceph
 134         chance_down: 10
 135         op_delay: 3
 136         min_in: 1
 137         timeout: 600
 138     - interactive:
 139     """
 140     if config is None:
 141         config = {}
 142     assert isinstance(config, dict), \
 143         'thrashosds task only accepts a dict for configuration'
 144     # add default value for sighup_delay
 145     config['sighup_delay'] = config.get('sighup_delay', 0.1)
 146     # add default value for optrack_toggle_delay
 147     config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
 148     # add default value for dump_ops_enable
 149     config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
 150     # add default value for noscrub_toggle_delay
 151     config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
 152     # add default value for random_eio
 153     config['random_eio'] = config.get('random_eio', 0.0)
 154
 155     log.info("config is {config}".format(config=str(config)))
 156
 157     overrides = ctx.config.get('overrides', {})
 158     log.info("overrides is {overrides}".format(overrides=str(overrides)))
 159     teuthology.deep_merge(config, overrides.get('thrashosds', {}))
 160     cluster = config.get('cluster', 'ceph')
 161
 162     log.info("config is {config}".format(config=str(config)))
 163
 164     if 'powercycle' in config:
 165
 166         # sync everyone first to avoid collateral damage to / etc.
 167         log.info('Doing preliminary sync to avoid collateral damage...')
 168         ctx.cluster.run(args=['sync'])
 169
 170         if 'ipmi_user' in ctx.teuthology_config:
 171             for remote in ctx.cluster.remotes.keys():
 172                 log.debug('checking console status of %s' % remote.shortname)
 173                 if not remote.console.check_status():
 174                     log.warn('Failed to get console status for %s',
 175                              remote.shortname)
 176
 177             # check that all osd remotes have a valid console
 178             osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
 179             for remote in osds.remotes.keys():
 180                 if not remote.console.has_ipmi_credentials:
 181                     raise Exception(
 182                         'IPMI console required for powercycling, '
 183                         'but not available on osd role: {r}'.format(
 184                             r=remote.name))
 185
 186     cluster_manager = ctx.managers[cluster]
 187     for f in ['powercycle', 'bdev_inject_crash']:
 188         if config.get(f):
 189             cluster_manager.config[f] = config.get(f)
 190
 191     log.info('Beginning thrashosds...')
 192     thrash_proc = ceph_manager.Thrasher(
 193         cluster_manager,
 194         config,
 195         logger=log.getChild('thrasher')
 196         )
 197     try:
 198         yield
 199     finally:
 200         log.info('joining thrashosds')
 201         thrash_proc.do_join()
 202         cluster_manager.wait_for_all_osds_up()
 203         cluster_manager.flush_all_pg_stats()
 204         cluster_manager.wait_for_recovery(config.get('timeout', 360))