Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / qa / tasks / rados.py
1 """
2 Rados modle-based integration tests
3 """
4 import contextlib
5 import logging
6 import gevent
7 from teuthology import misc as teuthology
8
9 from teuthology.orchestra import run
10
11 log = logging.getLogger(__name__)
12
13 @contextlib.contextmanager
14 def task(ctx, config):
15     """
16     Run RadosModel-based integration tests.
17
18     The config should be as follows::
19
20         rados:
21           clients: [client list]
22           ops: <number of ops>
23           objects: <number of objects to use>
24           max_in_flight: <max number of operations in flight>
25           object_size: <size of objects in bytes>
26           min_stride_size: <minimum write stride size in bytes>
27           max_stride_size: <maximum write stride size in bytes>
28           op_weights: <dictionary mapping operation type to integer weight>
29           runs: <number of times to run> - the pool is remade between runs
30           ec_pool: use an ec pool
31           erasure_code_profile: profile to use with the erasure coded pool
32           fast_read: enable ec_pool's fast_read
33           min_size: set the min_size of created pool
34           pool_snaps: use pool snapshots instead of selfmanaged snapshots
35           write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED.
36                                   This mean data don't access in the near future.
37                                   Let osd backend don't keep data in cache.
38
39     For example::
40
41         tasks:
42         - ceph:
43         - rados:
44             clients: [client.0]
45             ops: 1000
46             max_seconds: 0   # 0 for no limit
47             objects: 25
48             max_in_flight: 16
49             object_size: 4000000
50             min_stride_size: 1024
51             max_stride_size: 4096
52             op_weights:
53               read: 20
54               write: 10
55               delete: 2
56               snap_create: 3
57               rollback: 2
58               snap_remove: 0
59             ec_pool: create an ec pool, defaults to False
60             erasure_code_use_overwrites: test overwrites, default false
61             erasure_code_profile:
62               name: teuthologyprofile
63               k: 2
64               m: 1
65               crush-failure-domain: osd
66             pool_snaps: true
67             write_fadvise_dontneed: true
68             runs: 10
69         - interactive:
70
71     Optionally, you can provide the pool name to run against:
72
73         tasks:
74         - ceph:
75         - exec:
76             client.0:
77               - ceph osd pool create foo
78         - rados:
79             clients: [client.0]
80             pools: [foo]
81             ...
82
83     Alternatively, you can provide a pool prefix:
84
85         tasks:
86         - ceph:
87         - exec:
88             client.0:
89               - ceph osd pool create foo.client.0
90         - rados:
91             clients: [client.0]
92             pool_prefix: foo
93             ...
94
95     The tests are run asynchronously, they are not complete when the task
96     returns. For instance:
97
98         - rados:
99             clients: [client.0]
100             pools: [ecbase]
101             ops: 4000
102             objects: 500
103             op_weights:
104               read: 100
105               write: 100
106               delete: 50
107               copy_from: 50
108         - print: "**** done rados ec-cache-agent (part 2)"
109
110      will run the print task immediately after the rados tasks begins but
111      not after it completes. To make the rados task a blocking / sequential
112      task, use:
113
114         - sequential:
115           - rados:
116               clients: [client.0]
117               pools: [ecbase]
118               ops: 4000
119               objects: 500
120               op_weights:
121                 read: 100
122                 write: 100
123                 delete: 50
124                 copy_from: 50
125         - print: "**** done rados ec-cache-agent (part 2)"
126
127     """
128     log.info('Beginning rados...')
129     assert isinstance(config, dict), \
130         "please list clients to run on"
131
132     object_size = int(config.get('object_size', 4000000))
133     op_weights = config.get('op_weights', {})
134     testdir = teuthology.get_testdir(ctx)
135     args = [
136         'adjust-ulimits',
137         'ceph-coverage',
138         '{tdir}/archive/coverage'.format(tdir=testdir),
139         'ceph_test_rados']
140     if config.get('ec_pool', False):
141         args.extend(['--no-omap'])
142         if not config.get('erasure_code_use_overwrites', False):
143             args.extend(['--ec-pool'])
144     if config.get('write_fadvise_dontneed', False):
145         args.extend(['--write-fadvise-dontneed'])
146     if config.get('set_redirect', False):
147         args.extend(['--set_redirect'])
148     if config.get('pool_snaps', False):
149         args.extend(['--pool-snaps'])
150     args.extend([
151         '--max-ops', str(config.get('ops', 10000)),
152         '--objects', str(config.get('objects', 500)),
153         '--max-in-flight', str(config.get('max_in_flight', 16)),
154         '--size', str(object_size),
155         '--min-stride-size', str(config.get('min_stride_size', object_size / 10)),
156         '--max-stride-size', str(config.get('max_stride_size', object_size / 5)),
157         '--max-seconds', str(config.get('max_seconds', 0))
158         ])
159
160     weights = {}
161     weights['read'] = 100
162     weights['write'] = 100
163     weights['delete'] = 10
164     # Parallel of the op_types in test/osd/TestRados.cc
165     for field in [
166         # read handled above
167         # write handled above
168         # delete handled above
169         "snap_create",
170         "snap_remove",
171         "rollback",
172         "setattr",
173         "rmattr",
174         "watch",
175         "copy_from",
176         "hit_set_list",
177         "is_dirty",
178         "undirty",
179         "cache_flush",
180         "cache_try_flush",
181         "cache_evict",
182         "append",
183         "write",
184         "read",
185         "delete"
186         ]:
187         if field in op_weights:
188             weights[field] = op_weights[field]
189
190     if config.get('write_append_excl', True):
191         if 'write' in weights:
192             weights['write'] = weights['write'] / 2
193             weights['write_excl'] = weights['write']
194
195         if 'append' in weights:
196             weights['append'] = weights['append'] / 2
197             weights['append_excl'] = weights['append']
198
199     for op, weight in weights.iteritems():
200         args.extend([
201             '--op', op, str(weight)
202         ])
203                 
204
205     def thread():
206         """Thread spawned by gevent"""
207         clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')]
208         log.info('clients are %s' % clients)
209         manager = ctx.managers['ceph']
210         if config.get('ec_pool', False):
211             profile = config.get('erasure_code_profile', {})
212             profile_name = profile.get('name', 'teuthologyprofile')
213             manager.create_erasure_code_profile(profile_name, profile)
214         else:
215             profile_name = None
216         for i in range(int(config.get('runs', '1'))):
217             log.info("starting run %s out of %s", str(i), config.get('runs', '1'))
218             tests = {}
219             existing_pools = config.get('pools', [])
220             created_pools = []
221             for role in config.get('clients', clients):
222                 assert isinstance(role, basestring)
223                 PREFIX = 'client.'
224                 assert role.startswith(PREFIX)
225                 id_ = role[len(PREFIX):]
226
227                 pool = config.get('pool', None)
228                 if not pool and existing_pools:
229                     pool = existing_pools.pop()
230                 else:
231                     pool = manager.create_pool_with_unique_name(
232                         erasure_code_profile_name=profile_name,
233                         erasure_code_use_overwrites=
234                           config.get('erasure_code_use_overwrites', False)
235                     )
236                     created_pools.append(pool)
237                     if config.get('fast_read', False):
238                         manager.raw_cluster_cmd(
239                             'osd', 'pool', 'set', pool, 'fast_read', 'true')
240                     min_size = config.get('min_size', None);
241                     if min_size is not None:
242                         manager.raw_cluster_cmd(
243                             'osd', 'pool', 'set', pool, 'min_size', str(min_size))
244
245                 (remote,) = ctx.cluster.only(role).remotes.iterkeys()
246                 proc = remote.run(
247                     args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args +
248                     ["--pool", pool],
249                     logger=log.getChild("rados.{id}".format(id=id_)),
250                     stdin=run.PIPE,
251                     wait=False
252                     )
253                 tests[id_] = proc
254             run.wait(tests.itervalues())
255
256             for pool in created_pools:
257                 manager.wait_snap_trimming_complete(pool);
258                 manager.remove_pool(pool)
259
260     running = gevent.spawn(thread)
261
262     try:
263         yield
264     finally:
265         log.info('joining rados')
266         running.get()