src/ceph/qa/tasks/rados.py

   1 """
   2 Rados modle-based integration tests
   3 """
   4 import contextlib
   5 import logging
   6 import gevent
   7 from teuthology import misc as teuthology
   8
   9 from teuthology.orchestra import run
  10
  11 log = logging.getLogger(__name__)
  12
  13 @contextlib.contextmanager
  14 def task(ctx, config):
  15     """
  16     Run RadosModel-based integration tests.
  17
  18     The config should be as follows::
  19
  20         rados:
  21           clients: [client list]
  22           ops: <number of ops>
  23           objects: <number of objects to use>
  24           max_in_flight: <max number of operations in flight>
  25           object_size: <size of objects in bytes>
  26           min_stride_size: <minimum write stride size in bytes>
  27           max_stride_size: <maximum write stride size in bytes>
  28           op_weights: <dictionary mapping operation type to integer weight>
  29           runs: <number of times to run> - the pool is remade between runs
  30           ec_pool: use an ec pool
  31           erasure_code_profile: profile to use with the erasure coded pool
  32           fast_read: enable ec_pool's fast_read
  33           min_size: set the min_size of created pool
  34           pool_snaps: use pool snapshots instead of selfmanaged snapshots
  35           write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED.
  36                                   This mean data don't access in the near future.
  37                                   Let osd backend don't keep data in cache.
  38
  39     For example::
  40
  41         tasks:
  42         - ceph:
  43         - rados:
  44             clients: [client.0]
  45             ops: 1000
  46             max_seconds: 0   # 0 for no limit
  47             objects: 25
  48             max_in_flight: 16
  49             object_size: 4000000
  50             min_stride_size: 1024
  51             max_stride_size: 4096
  52             op_weights:
  53               read: 20
  54               write: 10
  55               delete: 2
  56               snap_create: 3
  57               rollback: 2
  58               snap_remove: 0
  59             ec_pool: create an ec pool, defaults to False
  60             erasure_code_use_overwrites: test overwrites, default false
  61             erasure_code_profile:
  62               name: teuthologyprofile
  63               k: 2
  64               m: 1
  65               crush-failure-domain: osd
  66             pool_snaps: true
  67             write_fadvise_dontneed: true
  68             runs: 10
  69         - interactive:
  70
  71     Optionally, you can provide the pool name to run against:
  72
  73         tasks:
  74         - ceph:
  75         - exec:
  76             client.0:
  77               - ceph osd pool create foo
  78         - rados:
  79             clients: [client.0]
  80             pools: [foo]
  81             ...
  82
  83     Alternatively, you can provide a pool prefix:
  84
  85         tasks:
  86         - ceph:
  87         - exec:
  88             client.0:
  89               - ceph osd pool create foo.client.0
  90         - rados:
  91             clients: [client.0]
  92             pool_prefix: foo
  93             ...
  94
  95     The tests are run asynchronously, they are not complete when the task
  96     returns. For instance:
  97
  98         - rados:
  99             clients: [client.0]
 100             pools: [ecbase]
 101             ops: 4000
 102             objects: 500
 103             op_weights:
 104               read: 100
 105               write: 100
 106               delete: 50
 107               copy_from: 50
 108         - print: "**** done rados ec-cache-agent (part 2)"
 109
 110      will run the print task immediately after the rados tasks begins but
 111      not after it completes. To make the rados task a blocking / sequential
 112      task, use:
 113
 114         - sequential:
 115           - rados:
 116               clients: [client.0]
 117               pools: [ecbase]
 118               ops: 4000
 119               objects: 500
 120               op_weights:
 121                 read: 100
 122                 write: 100
 123                 delete: 50
 124                 copy_from: 50
 125         - print: "**** done rados ec-cache-agent (part 2)"
 126
 127     """
 128     log.info('Beginning rados...')
 129     assert isinstance(config, dict), \
 130         "please list clients to run on"
 131
 132     object_size = int(config.get('object_size', 4000000))
 133     op_weights = config.get('op_weights', {})
 134     testdir = teuthology.get_testdir(ctx)
 135     args = [
 136         'adjust-ulimits',
 137         'ceph-coverage',
 138         '{tdir}/archive/coverage'.format(tdir=testdir),
 139         'ceph_test_rados']
 140     if config.get('ec_pool', False):
 141         args.extend(['--no-omap'])
 142         if not config.get('erasure_code_use_overwrites', False):
 143             args.extend(['--ec-pool'])
 144     if config.get('write_fadvise_dontneed', False):
 145         args.extend(['--write-fadvise-dontneed'])
 146     if config.get('set_redirect', False):
 147         args.extend(['--set_redirect'])
 148     if config.get('pool_snaps', False):
 149         args.extend(['--pool-snaps'])
 150     args.extend([
 151         '--max-ops', str(config.get('ops', 10000)),
 152         '--objects', str(config.get('objects', 500)),
 153         '--max-in-flight', str(config.get('max_in_flight', 16)),
 154         '--size', str(object_size),
 155         '--min-stride-size', str(config.get('min_stride_size', object_size / 10)),
 156         '--max-stride-size', str(config.get('max_stride_size', object_size / 5)),
 157         '--max-seconds', str(config.get('max_seconds', 0))
 158         ])
 159
 160     weights = {}
 161     weights['read'] = 100
 162     weights['write'] = 100
 163     weights['delete'] = 10
 164     # Parallel of the op_types in test/osd/TestRados.cc
 165     for field in [
 166         # read handled above
 167         # write handled above
 168         # delete handled above
 169         "snap_create",
 170         "snap_remove",
 171         "rollback",
 172         "setattr",
 173         "rmattr",
 174         "watch",
 175         "copy_from",
 176         "hit_set_list",
 177         "is_dirty",
 178         "undirty",
 179         "cache_flush",
 180         "cache_try_flush",
 181         "cache_evict",
 182         "append",
 183         "write",
 184         "read",
 185         "delete"
 186         ]:
 187         if field in op_weights:
 188             weights[field] = op_weights[field]
 189
 190     if config.get('write_append_excl', True):
 191         if 'write' in weights:
 192             weights['write'] = weights['write'] / 2
 193             weights['write_excl'] = weights['write']
 194
 195         if 'append' in weights:
 196             weights['append'] = weights['append'] / 2
 197             weights['append_excl'] = weights['append']
 198
 199     for op, weight in weights.iteritems():
 200         args.extend([
 201             '--op', op, str(weight)
 202         ])
 203
 204
 205     def thread():
 206         """Thread spawned by gevent"""
 207         clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
 208         log.info('clients are %s' % clients)
 209         manager = ctx.managers['ceph']
 210         if config.get('ec_pool', False):
 211             profile = config.get('erasure_code_profile', {})
 212             profile_name = profile.get('name', 'teuthologyprofile')
 213             manager.create_erasure_code_profile(profile_name, profile)
 214         else:
 215             profile_name = None
 216         for i in range(int(config.get('runs', '1'))):
 217             log.info("starting run %s out of %s", str(i), config.get('runs', '1'))
 218             tests = {}
 219             existing_pools = config.get('pools', [])
 220             created_pools = []
 221             for role in config.get('clients', clients):
 222                 assert isinstance(role, basestring)
 223                 PREFIX = 'client.'
 224                 assert role.startswith(PREFIX)
 225                 id_ = role[len(PREFIX):]
 226
 227                 pool = config.get('pool', None)
 228                 if not pool and existing_pools:
 229                     pool = existing_pools.pop()
 230                 else:
 231                     pool = manager.create_pool_with_unique_name(
 232                         erasure_code_profile_name=profile_name,
 233                         erasure_code_use_overwrites=
 234                           config.get('erasure_code_use_overwrites', False)
 235                     )
 236                     created_pools.append(pool)
 237                     if config.get('fast_read', False):
 238                         manager.raw_cluster_cmd(
 239                             'osd', 'pool', 'set', pool, 'fast_read', 'true')
 240                     min_size = config.get('min_size', None);
 241                     if min_size is not None:
 242                         manager.raw_cluster_cmd(
 243                             'osd', 'pool', 'set', pool, 'min_size', str(min_size))
 244
 245                 (remote,) = ctx.cluster.only(role).remotes.iterkeys()
 246                 proc = remote.run(
 247                     args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args +
 248                     ["--pool", pool],
 249                     logger=log.getChild("rados.{id}".format(id=id_)),
 250                     stdin=run.PIPE,
 251                     wait=False
 252                     )
 253                 tests[id_] = proc
 254             run.wait(tests.itervalues())
 255
 256             for pool in created_pools:
 257                 manager.wait_snap_trimming_complete(pool);
 258                 manager.remove_pool(pool)
 259
 260     running = gevent.spawn(thread)
 261
 262     try:
 263         yield
 264     finally:
 265         log.info('joining rados')
 266         running.get()