src/ceph/qa/tasks/ceph_deploy.py

   1 """
   2 Execute ceph-deploy as a task
   3 """
   4 from cStringIO import StringIO
   5
   6 import contextlib
   7 import os
   8 import time
   9 import logging
  10 import traceback
  11
  12 from teuthology import misc as teuthology
  13 from teuthology import contextutil
  14 from teuthology.config import config as teuth_config
  15 from teuthology.task import install as install_fn
  16 from teuthology.orchestra import run
  17 from tasks.cephfs.filesystem import Filesystem
  18 from teuthology.misc import wait_until_healthy
  19
  20 log = logging.getLogger(__name__)
  21
  22
  23 @contextlib.contextmanager
  24 def download_ceph_deploy(ctx, config):
  25     """
  26     Downloads ceph-deploy from the ceph.com git mirror and (by default)
  27     switches to the master branch. If the `ceph-deploy-branch` is specified, it
  28     will use that instead. The `bootstrap` script is ran, with the argument
  29     obtained from `python_version`, if specified.
  30     """
  31     # use mon.a for ceph_admin
  32     (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
  33
  34     try:
  35         py_ver = str(config['python_version'])
  36     except KeyError:
  37         pass
  38     else:
  39         supported_versions = ['2', '3']
  40         if py_ver not in supported_versions:
  41             raise ValueError("python_version must be: {}, not {}".format(
  42                 ' or '.join(supported_versions), py_ver
  43             ))
  44
  45         log.info("Installing Python")
  46         system_type = teuthology.get_system_type(ceph_admin)
  47
  48         if system_type == 'rpm':
  49             package = 'python34' if py_ver == '3' else 'python'
  50             ctx.cluster.run(args=[
  51                 'sudo', 'yum', '-y', 'install',
  52                 package, 'python-virtualenv'
  53             ])
  54         else:
  55             package = 'python3' if py_ver == '3' else 'python'
  56             ctx.cluster.run(args=[
  57                 'sudo', 'apt-get', '-y', '--force-yes', 'install',
  58                 package, 'python-virtualenv'
  59             ])
  60
  61     log.info('Downloading ceph-deploy...')
  62     testdir = teuthology.get_testdir(ctx)
  63     ceph_deploy_branch = config.get('ceph-deploy-branch', 'master')
  64
  65     ceph_admin.run(
  66         args=[
  67             'git', 'clone', '-b', ceph_deploy_branch,
  68             teuth_config.ceph_git_base_url + 'ceph-deploy.git',
  69             '{tdir}/ceph-deploy'.format(tdir=testdir),
  70         ],
  71     )
  72     args = [
  73         'cd',
  74         '{tdir}/ceph-deploy'.format(tdir=testdir),
  75         run.Raw('&&'),
  76         './bootstrap',
  77     ]
  78     try:
  79         args.append(str(config['python_version']))
  80     except KeyError:
  81         pass
  82     ceph_admin.run(args=args)
  83
  84     try:
  85         yield
  86     finally:
  87         log.info('Removing ceph-deploy ...')
  88         ceph_admin.run(
  89             args=[
  90                 'rm',
  91                 '-rf',
  92                 '{tdir}/ceph-deploy'.format(tdir=testdir),
  93             ],
  94         )
  95
  96
  97 def is_healthy(ctx, config):
  98     """Wait until a Ceph cluster is healthy."""
  99     testdir = teuthology.get_testdir(ctx)
 100     ceph_admin = teuthology.get_first_mon(ctx, config)
 101     (remote,) = ctx.cluster.only(ceph_admin).remotes.keys()
 102     max_tries = 90  # 90 tries * 10 secs --> 15 minutes
 103     tries = 0
 104     while True:
 105         tries += 1
 106         if tries >= max_tries:
 107             msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes"
 108             remote.run(
 109                 args=[
 110                     'cd',
 111                     '{tdir}'.format(tdir=testdir),
 112                     run.Raw('&&'),
 113                     'sudo', 'ceph',
 114                     'report',
 115                 ],
 116             )
 117             raise RuntimeError(msg)
 118
 119         r = remote.run(
 120             args=[
 121                 'cd',
 122                 '{tdir}'.format(tdir=testdir),
 123                 run.Raw('&&'),
 124                 'sudo', 'ceph',
 125                 'health',
 126             ],
 127             stdout=StringIO(),
 128             logger=log.getChild('health'),
 129         )
 130         out = r.stdout.getvalue()
 131         log.info('Ceph health: %s', out.rstrip('\n'))
 132         if out.split(None, 1)[0] == 'HEALTH_OK':
 133             break
 134         time.sleep(10)
 135
 136
 137 def get_nodes_using_role(ctx, target_role):
 138     """
 139     Extract the names of nodes that match a given role from a cluster, and modify the
 140     cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy
 141     uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23".
 142     """
 143
 144     # Nodes containing a service of the specified role
 145     nodes_of_interest = []
 146
 147     # Prepare a modified version of cluster.remotes with ceph-deploy-ized names
 148     modified_remotes = {}
 149     ceph_deploy_mapped = dict()
 150     for _remote, roles_for_host in ctx.cluster.remotes.iteritems():
 151         modified_remotes[_remote] = []
 152         for svc_id in roles_for_host:
 153             if svc_id.startswith("{0}.".format(target_role)):
 154                 fqdn = str(_remote).split('@')[-1]
 155                 nodename = str(str(_remote).split('.')[0]).split('@')[1]
 156                 if target_role == 'mon':
 157                     nodes_of_interest.append(fqdn)
 158                 else:
 159                     nodes_of_interest.append(nodename)
 160                 mapped_role = "{0}.{1}".format(target_role, nodename)
 161                 modified_remotes[_remote].append(mapped_role)
 162                 # keep dict of mapped role for later use by tasks
 163                 # eg. mon.a => mon.node1
 164                 ceph_deploy_mapped[svc_id] = mapped_role
 165             else:
 166                 modified_remotes[_remote].append(svc_id)
 167
 168     ctx.cluster.remotes = modified_remotes
 169     ctx.cluster.mapped_role = ceph_deploy_mapped
 170
 171     return nodes_of_interest
 172
 173
 174 def get_dev_for_osd(ctx, config):
 175     """Get a list of all osd device names."""
 176     osd_devs = []
 177     for remote, roles_for_host in ctx.cluster.remotes.iteritems():
 178         host = remote.name.split('@')[-1]
 179         shortname = host.split('.')[0]
 180         devs = teuthology.get_scratch_devices(remote)
 181         num_osd_per_host = list(
 182             teuthology.roles_of_type(
 183                 roles_for_host, 'osd'))
 184         num_osds = len(num_osd_per_host)
 185         if config.get('separate_journal_disk') is not None:
 186             num_devs_reqd = 2 * num_osds
 187             assert num_devs_reqd <= len(
 188                 devs), 'fewer data and journal disks than required ' + shortname
 189             for dindex in range(0, num_devs_reqd, 2):
 190                 jd_index = dindex + 1
 191                 dev_short = devs[dindex].split('/')[-1]
 192                 jdev_short = devs[jd_index].split('/')[-1]
 193                 osd_devs.append((shortname, dev_short, jdev_short))
 194         else:
 195             assert num_osds <= len(devs), 'fewer disks than osds ' + shortname
 196             for dev in devs[:num_osds]:
 197                 dev_short = dev.split('/')[-1]
 198                 osd_devs.append((shortname, dev_short))
 199     return osd_devs
 200
 201
 202 def get_all_nodes(ctx, config):
 203     """Return a string of node names separated by blanks"""
 204     nodelist = []
 205     for t, k in ctx.config['targets'].iteritems():
 206         host = t.split('@')[-1]
 207         simple_host = host.split('.')[0]
 208         nodelist.append(simple_host)
 209     nodelist = " ".join(nodelist)
 210     return nodelist
 211
 212
 213 @contextlib.contextmanager
 214 def build_ceph_cluster(ctx, config):
 215     """Build a ceph cluster"""
 216
 217     # Expect to find ceph_admin on the first mon by ID, same place that the download task
 218     # puts it.  Remember this here, because subsequently IDs will change from those in
 219     # the test config to those that ceph-deploy invents.
 220
 221     (ceph_admin,) = ctx.cluster.only('mon.a').remotes.iterkeys()
 222
 223     def execute_ceph_deploy(cmd):
 224         """Remotely execute a ceph_deploy command"""
 225         return ceph_admin.run(
 226             args=[
 227                 'cd',
 228                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 229                 run.Raw('&&'),
 230                 run.Raw(cmd),
 231             ],
 232             check_status=False,
 233         ).exitstatus
 234
 235     try:
 236         log.info('Building ceph cluster using ceph-deploy...')
 237         testdir = teuthology.get_testdir(ctx)
 238         ceph_branch = None
 239         if config.get('branch') is not None:
 240             cbranch = config.get('branch')
 241             for var, val in cbranch.iteritems():
 242                 ceph_branch = '--{var}={val}'.format(var=var, val=val)
 243         all_nodes = get_all_nodes(ctx, config)
 244         mds_nodes = get_nodes_using_role(ctx, 'mds')
 245         mds_nodes = " ".join(mds_nodes)
 246         mon_node = get_nodes_using_role(ctx, 'mon')
 247         mon_nodes = " ".join(mon_node)
 248         # skip mgr based on config item
 249         # this is needed when test uses latest code to install old ceph
 250         # versions
 251         skip_mgr = config.get('skip-mgr', False)
 252         if not skip_mgr:
 253             mgr_nodes = get_nodes_using_role(ctx, 'mgr')
 254             mgr_nodes = " ".join(mgr_nodes)
 255         new_mon = './ceph-deploy new' + " " + mon_nodes
 256         if not skip_mgr:
 257             mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
 258         mon_hostname = mon_nodes.split(' ')[0]
 259         mon_hostname = str(mon_hostname)
 260         gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname
 261         deploy_mds = './ceph-deploy mds create' + " " + mds_nodes
 262         no_of_osds = 0
 263
 264         if mon_nodes is None:
 265             raise RuntimeError("no monitor nodes in the config file")
 266
 267         estatus_new = execute_ceph_deploy(new_mon)
 268         if estatus_new != 0:
 269             raise RuntimeError("ceph-deploy: new command failed")
 270
 271         log.info('adding config inputs...')
 272         testdir = teuthology.get_testdir(ctx)
 273         conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir)
 274
 275         if config.get('conf') is not None:
 276             confp = config.get('conf')
 277             for section, keys in confp.iteritems():
 278                 lines = '[{section}]\n'.format(section=section)
 279                 teuthology.append_lines_to_file(ceph_admin, conf_path, lines,
 280                                                 sudo=True)
 281                 for key, value in keys.iteritems():
 282                     log.info("[%s] %s = %s" % (section, key, value))
 283                     lines = '{key} = {value}\n'.format(key=key, value=value)
 284                     teuthology.append_lines_to_file(
 285                         ceph_admin, conf_path, lines, sudo=True)
 286
 287         # install ceph
 288         dev_branch = ctx.config['branch']
 289         branch = '--dev={branch}'.format(branch=dev_branch)
 290         if ceph_branch:
 291             option = ceph_branch
 292         else:
 293             option = branch
 294         install_nodes = './ceph-deploy install ' + option + " " + all_nodes
 295         estatus_install = execute_ceph_deploy(install_nodes)
 296         if estatus_install != 0:
 297             raise RuntimeError("ceph-deploy: Failed to install ceph")
 298         # install ceph-test package too
 299         install_nodes2 = './ceph-deploy install --tests ' + option + \
 300                          " " + all_nodes
 301         estatus_install = execute_ceph_deploy(install_nodes2)
 302         if estatus_install != 0:
 303             raise RuntimeError("ceph-deploy: Failed to install ceph-test")
 304
 305         mon_create_nodes = './ceph-deploy mon create-initial'
 306         # If the following fails, it is OK, it might just be that the monitors
 307         # are taking way more than a minute/monitor to form quorum, so lets
 308         # try the next block which will wait up to 15 minutes to gatherkeys.
 309         execute_ceph_deploy(mon_create_nodes)
 310
 311         # create-keys is explicit now
 312         # http://tracker.ceph.com/issues/16036
 313         mons = ctx.cluster.only(teuthology.is_type('mon'))
 314         for remote in mons.remotes.iterkeys():
 315             remote.run(args=['sudo', 'ceph-create-keys', '--cluster', 'ceph',
 316                              '--id', remote.shortname])
 317
 318         estatus_gather = execute_ceph_deploy(gather_keys)
 319
 320         if not skip_mgr:
 321             execute_ceph_deploy(mgr_create)
 322
 323         if mds_nodes:
 324             estatus_mds = execute_ceph_deploy(deploy_mds)
 325             if estatus_mds != 0:
 326                 raise RuntimeError("ceph-deploy: Failed to deploy mds")
 327
 328         if config.get('test_mon_destroy') is not None:
 329             for d in range(1, len(mon_node)):
 330                 mon_destroy_nodes = './ceph-deploy mon destroy' + \
 331                     " " + mon_node[d]
 332                 estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes)
 333                 if estatus_mon_d != 0:
 334                     raise RuntimeError("ceph-deploy: Failed to delete monitor")
 335
 336         node_dev_list = get_dev_for_osd(ctx, config)
 337         for d in node_dev_list:
 338             node = d[0]
 339             for disk in d[1:]:
 340                 zap = './ceph-deploy disk zap ' + node + ':' + disk
 341                 estatus = execute_ceph_deploy(zap)
 342                 if estatus != 0:
 343                     raise RuntimeError("ceph-deploy: Failed to zap osds")
 344             osd_create_cmd = './ceph-deploy osd create '
 345             # first check for filestore, default is bluestore with ceph-deploy
 346             if config.get('filestore') is not None:
 347                 osd_create_cmd += '--filestore '
 348             elif config.get('bluestore') is not None:
 349                 osd_create_cmd += '--bluestore '
 350             if config.get('dmcrypt') is not None:
 351                 osd_create_cmd += '--dmcrypt '
 352             osd_create_cmd += ":".join(d)
 353             estatus_osd = execute_ceph_deploy(osd_create_cmd)
 354             if estatus_osd == 0:
 355                 log.info('successfully created osd')
 356                 no_of_osds += 1
 357             else:
 358                 raise RuntimeError("ceph-deploy: Failed to create osds")
 359
 360         if config.get('wait-for-healthy', True) and no_of_osds >= 2:
 361             is_healthy(ctx=ctx, config=None)
 362
 363             log.info('Setting up client nodes...')
 364             conf_path = '/etc/ceph/ceph.conf'
 365             admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring'
 366             first_mon = teuthology.get_first_mon(ctx, config)
 367             (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys()
 368             conf_data = teuthology.get_file(
 369                 remote=mon0_remote,
 370                 path=conf_path,
 371                 sudo=True,
 372             )
 373             admin_keyring = teuthology.get_file(
 374                 remote=mon0_remote,
 375                 path=admin_keyring_path,
 376                 sudo=True,
 377             )
 378
 379             clients = ctx.cluster.only(teuthology.is_type('client'))
 380             for remot, roles_for_host in clients.remotes.iteritems():
 381                 for id_ in teuthology.roles_of_type(roles_for_host, 'client'):
 382                     client_keyring = \
 383                         '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_)
 384                     mon0_remote.run(
 385                         args=[
 386                             'cd',
 387                             '{tdir}'.format(tdir=testdir),
 388                             run.Raw('&&'),
 389                             'sudo', 'bash', '-c',
 390                             run.Raw('"'), 'ceph',
 391                             'auth',
 392                             'get-or-create',
 393                             'client.{id}'.format(id=id_),
 394                             'mds', 'allow',
 395                             'mon', 'allow *',
 396                             'osd', 'allow *',
 397                             run.Raw('>'),
 398                             client_keyring,
 399                             run.Raw('"'),
 400                         ],
 401                     )
 402                     key_data = teuthology.get_file(
 403                         remote=mon0_remote,
 404                         path=client_keyring,
 405                         sudo=True,
 406                     )
 407                     teuthology.sudo_write_file(
 408                         remote=remot,
 409                         path=client_keyring,
 410                         data=key_data,
 411                         perms='0644'
 412                     )
 413                     teuthology.sudo_write_file(
 414                         remote=remot,
 415                         path=admin_keyring_path,
 416                         data=admin_keyring,
 417                         perms='0644'
 418                     )
 419                     teuthology.sudo_write_file(
 420                         remote=remot,
 421                         path=conf_path,
 422                         data=conf_data,
 423                         perms='0644'
 424                     )
 425
 426             if mds_nodes:
 427                 log.info('Configuring CephFS...')
 428                 Filesystem(ctx, create=True)
 429         elif not config.get('only_mon'):
 430             raise RuntimeError(
 431                 "The cluster is NOT operational due to insufficient OSDs")
 432         yield
 433
 434     except Exception:
 435         log.info(
 436             "Error encountered, logging exception before tearing down ceph-deploy")
 437         log.info(traceback.format_exc())
 438         raise
 439     finally:
 440         if config.get('keep_running'):
 441             return
 442         log.info('Stopping ceph...')
 443         ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
 444                               'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
 445                               'sudo', 'systemctl', 'stop', 'ceph.target'])
 446
 447         # Are you really not running anymore?
 448         # try first with the init tooling
 449         # ignoring the status so this becomes informational only
 450         ctx.cluster.run(
 451             args=[
 452                 'sudo', 'status', 'ceph-all', run.Raw('||'),
 453                 'sudo', 'service', 'ceph', 'status', run.Raw('||'),
 454                 'sudo', 'systemctl', 'status', 'ceph.target'],
 455             check_status=False)
 456
 457         # and now just check for the processes themselves, as if upstart/sysvinit
 458         # is lying to us. Ignore errors if the grep fails
 459         ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'),
 460                               'grep', '-v', 'grep', run.Raw('|'),
 461                               'grep', 'ceph'], check_status=False)
 462
 463         if ctx.archive is not None:
 464             # archive mon data, too
 465             log.info('Archiving mon data...')
 466             path = os.path.join(ctx.archive, 'data')
 467             os.makedirs(path)
 468             mons = ctx.cluster.only(teuthology.is_type('mon'))
 469             for remote, roles in mons.remotes.iteritems():
 470                 for role in roles:
 471                     if role.startswith('mon.'):
 472                         teuthology.pull_directory_tarball(
 473                             remote,
 474                             '/var/lib/ceph/mon',
 475                             path + '/' + role + '.tgz')
 476
 477             log.info('Compressing logs...')
 478             run.wait(
 479                 ctx.cluster.run(
 480                     args=[
 481                         'sudo',
 482                         'find',
 483                         '/var/log/ceph',
 484                         '-name',
 485                         '*.log',
 486                         '-print0',
 487                         run.Raw('|'),
 488                         'sudo',
 489                         'xargs',
 490                         '-0',
 491                         '--no-run-if-empty',
 492                         '--',
 493                         'gzip',
 494                         '--',
 495                     ],
 496                     wait=False,
 497                 ),
 498             )
 499
 500             log.info('Archiving logs...')
 501             path = os.path.join(ctx.archive, 'remote')
 502             os.makedirs(path)
 503             for remote in ctx.cluster.remotes.iterkeys():
 504                 sub = os.path.join(path, remote.shortname)
 505                 os.makedirs(sub)
 506                 teuthology.pull_directory(remote, '/var/log/ceph',
 507                                           os.path.join(sub, 'log'))
 508
 509         # Prevent these from being undefined if the try block fails
 510         all_nodes = get_all_nodes(ctx, config)
 511         purge_nodes = './ceph-deploy purge' + " " + all_nodes
 512         purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes
 513
 514         log.info('Purging package...')
 515         execute_ceph_deploy(purge_nodes)
 516         log.info('Purging data...')
 517         execute_ceph_deploy(purgedata_nodes)
 518
 519
 520 @contextlib.contextmanager
 521 def cli_test(ctx, config):
 522     """
 523      ceph-deploy cli to exercise most commonly use cli's and ensure
 524      all commands works and also startup the init system.
 525
 526     """
 527     log.info('Ceph-deploy Test')
 528     if config is None:
 529         config = {}
 530     test_branch = ''
 531     conf_dir = teuthology.get_testdir(ctx) + "/cdtest"
 532
 533     def execute_cdeploy(admin, cmd, path):
 534         """Execute ceph-deploy commands """
 535         """Either use git path or repo path """
 536         args = ['cd', conf_dir, run.Raw(';')]
 537         if path:
 538             args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path))
 539         else:
 540             args.append('ceph-deploy')
 541         args.append(run.Raw(cmd))
 542         ec = admin.run(args=args, check_status=False).exitstatus
 543         if ec != 0:
 544             raise RuntimeError(
 545                 "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec))
 546
 547     if config.get('rhbuild'):
 548         path = None
 549     else:
 550         path = teuthology.get_testdir(ctx)
 551         # test on branch from config eg: wip-* , master or next etc
 552         # packages for all distro's should exist for wip*
 553         if ctx.config.get('branch'):
 554             branch = ctx.config.get('branch')
 555             test_branch = ' --dev={branch} '.format(branch=branch)
 556     mons = ctx.cluster.only(teuthology.is_type('mon'))
 557     for node, role in mons.remotes.iteritems():
 558         admin = node
 559         admin.run(args=['mkdir', conf_dir], check_status=False)
 560         nodename = admin.shortname
 561     system_type = teuthology.get_system_type(admin)
 562     if config.get('rhbuild'):
 563         admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y'])
 564     log.info('system type is %s', system_type)
 565     osds = ctx.cluster.only(teuthology.is_type('osd'))
 566
 567     for remote, roles in osds.remotes.iteritems():
 568         devs = teuthology.get_scratch_devices(remote)
 569         log.info("roles %s", roles)
 570         if (len(devs) < 3):
 571             log.error(
 572                 'Test needs minimum of 3 devices, only found %s',
 573                 str(devs))
 574             raise RuntimeError("Needs minimum of 3 devices ")
 575
 576     conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir)
 577     new_cmd = 'new ' + nodename
 578     execute_cdeploy(admin, new_cmd, path)
 579     if config.get('conf') is not None:
 580         confp = config.get('conf')
 581         for section, keys in confp.iteritems():
 582             lines = '[{section}]\n'.format(section=section)
 583             teuthology.append_lines_to_file(admin, conf_path, lines,
 584                                             sudo=True)
 585             for key, value in keys.iteritems():
 586                 log.info("[%s] %s = %s" % (section, key, value))
 587                 lines = '{key} = {value}\n'.format(key=key, value=value)
 588                 teuthology.append_lines_to_file(admin, conf_path, lines,
 589                                                 sudo=True)
 590     new_mon_install = 'install {branch} --mon '.format(
 591         branch=test_branch) + nodename
 592     new_mgr_install = 'install {branch} --mgr '.format(
 593         branch=test_branch) + nodename
 594     new_osd_install = 'install {branch} --osd '.format(
 595         branch=test_branch) + nodename
 596     new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename
 597     create_initial = 'mon create-initial '
 598     # either use create-keys or push command
 599     push_keys = 'admin ' + nodename
 600     execute_cdeploy(admin, new_mon_install, path)
 601     execute_cdeploy(admin, new_mgr_install, path)
 602     execute_cdeploy(admin, new_osd_install, path)
 603     execute_cdeploy(admin, new_admin, path)
 604     execute_cdeploy(admin, create_initial, path)
 605     execute_cdeploy(admin, push_keys, path)
 606
 607     for i in range(3):
 608         zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i])
 609         prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i])
 610         execute_cdeploy(admin, zap_disk, path)
 611         execute_cdeploy(admin, prepare, path)
 612
 613     log.info("list files for debugging purpose to check file permissions")
 614     admin.run(args=['ls', run.Raw('-lt'), conf_dir])
 615     remote.run(args=['sudo', 'ceph', '-s'], check_status=False)
 616     r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
 617     out = r.stdout.getvalue()
 618     log.info('Ceph health: %s', out.rstrip('\n'))
 619     log.info("Waiting for cluster to become healthy")
 620     with contextutil.safe_while(sleep=10, tries=6,
 621                                 action='check health') as proceed:
 622         while proceed():
 623             r = remote.run(args=['sudo', 'ceph', 'health'], stdout=StringIO())
 624             out = r.stdout.getvalue()
 625             if (out.split(None, 1)[0] == 'HEALTH_OK'):
 626                 break
 627     rgw_install = 'install {branch} --rgw {node}'.format(
 628         branch=test_branch,
 629         node=nodename,
 630     )
 631     rgw_create = 'rgw create ' + nodename
 632     execute_cdeploy(admin, rgw_install, path)
 633     execute_cdeploy(admin, rgw_create, path)
 634     log.info('All ceph-deploy cli tests passed')
 635     try:
 636         yield
 637     finally:
 638         log.info("cleaning up")
 639         ctx.cluster.run(args=['sudo', 'stop', 'ceph-all', run.Raw('||'),
 640                               'sudo', 'service', 'ceph', 'stop', run.Raw('||'),
 641                               'sudo', 'systemctl', 'stop', 'ceph.target'],
 642                         check_status=False)
 643         time.sleep(4)
 644         for i in range(3):
 645             umount_dev = "{d}1".format(d=devs[i])
 646             r = remote.run(args=['sudo', 'umount', run.Raw(umount_dev)])
 647         cmd = 'purge ' + nodename
 648         execute_cdeploy(admin, cmd, path)
 649         cmd = 'purgedata ' + nodename
 650         execute_cdeploy(admin, cmd, path)
 651         log.info("Removing temporary dir")
 652         admin.run(
 653             args=[
 654                 'rm',
 655                 run.Raw('-rf'),
 656                 run.Raw(conf_dir)],
 657             check_status=False)
 658         if config.get('rhbuild'):
 659             admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y'])
 660
 661
 662 @contextlib.contextmanager
 663 def single_node_test(ctx, config):
 664     """
 665     - ceph-deploy.single_node_test: null
 666
 667     #rhbuild testing
 668     - ceph-deploy.single_node_test:
 669         rhbuild: 1.2.3
 670
 671     """
 672     log.info("Testing ceph-deploy on single node")
 673     if config is None:
 674         config = {}
 675     overrides = ctx.config.get('overrides', {})
 676     teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
 677
 678     if config.get('rhbuild'):
 679         log.info("RH Build, Skip Download")
 680         with contextutil.nested(
 681             lambda: cli_test(ctx=ctx, config=config),
 682         ):
 683             yield
 684     else:
 685         with contextutil.nested(
 686             lambda: install_fn.ship_utilities(ctx=ctx, config=None),
 687             lambda: download_ceph_deploy(ctx=ctx, config=config),
 688             lambda: cli_test(ctx=ctx, config=config),
 689         ):
 690             yield
 691
 692
 693 @contextlib.contextmanager
 694 def upgrade(ctx, config):
 695     """
 696      Upgrade using ceph-deploy
 697      eg:
 698        ceph-deploy.upgrade:
 699           # to upgrade to specific branch, use
 700           branch:
 701              stable: jewel
 702            # to setup mgr node, use
 703            setup-mgr-node: True
 704            # to wait for cluster to be healthy after all upgrade, use
 705            wait-for-healthy: True
 706            role: (upgrades the below roles serially)
 707               mon.a
 708               mon.b
 709               osd.0
 710      """
 711     roles = config.get('roles')
 712     # get the roles that are mapped as per ceph-deploy
 713     # roles are mapped for mon/mds eg: mon.a  => mon.host_short_name
 714     mapped_role = ctx.cluster.mapped_role
 715     if config.get('branch'):
 716         branch = config.get('branch')
 717         (var, val) = branch.items()[0]
 718         ceph_branch = '--{var}={val}'.format(var=var, val=val)
 719     else:
 720         # default to master
 721         ceph_branch = '--dev=master'
 722     # get the node used for initial deployment which is mon.a
 723     mon_a = mapped_role.get('mon.a')
 724     (ceph_admin,) = ctx.cluster.only(mon_a).remotes.iterkeys()
 725     testdir = teuthology.get_testdir(ctx)
 726     cmd = './ceph-deploy install ' + ceph_branch
 727     for role in roles:
 728         # check if this role is mapped (mon or mds)
 729         if mapped_role.get(role):
 730             role = mapped_role.get(role)
 731         remotes_and_roles = ctx.cluster.only(role).remotes
 732         for remote, roles in remotes_and_roles.iteritems():
 733             nodename = remote.shortname
 734             cmd = cmd + ' ' + nodename
 735             log.info("Upgrading ceph on  %s", nodename)
 736             ceph_admin.run(
 737                 args=[
 738                     'cd',
 739                     '{tdir}/ceph-deploy'.format(tdir=testdir),
 740                     run.Raw('&&'),
 741                     run.Raw(cmd),
 742                 ],
 743             )
 744             # restart all ceph services, ideally upgrade should but it does not
 745             remote.run(
 746                 args=[
 747                     'sudo', 'systemctl', 'restart', 'ceph.target'
 748                 ]
 749             )
 750             ceph_admin.run(args=['sudo', 'ceph', '-s'])
 751
 752     # workaround for http://tracker.ceph.com/issues/20950
 753     # write the correct mgr key to disk
 754     if config.get('setup-mgr-node', None):
 755         mons = ctx.cluster.only(teuthology.is_type('mon'))
 756         for remote, roles in mons.remotes.iteritems():
 757             remote.run(
 758                 args=[
 759                     run.Raw('sudo ceph auth get client.bootstrap-mgr'),
 760                     run.Raw('|'),
 761                     run.Raw('sudo tee'),
 762                     run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring')
 763                 ]
 764             )
 765
 766     if config.get('setup-mgr-node', None):
 767         mgr_nodes = get_nodes_using_role(ctx, 'mgr')
 768         mgr_nodes = " ".join(mgr_nodes)
 769         mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes
 770         mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes
 771         # install mgr
 772         ceph_admin.run(
 773             args=[
 774                 'cd',
 775                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 776                 run.Raw('&&'),
 777                 run.Raw(mgr_install),
 778                 ],
 779             )
 780         # create mgr
 781         ceph_admin.run(
 782             args=[
 783                 'cd',
 784                 '{tdir}/ceph-deploy'.format(tdir=testdir),
 785                 run.Raw('&&'),
 786                 run.Raw(mgr_create),
 787                 ],
 788             )
 789         ceph_admin.run(args=['sudo', 'ceph', '-s'])
 790     if config.get('wait-for-healthy', None):
 791         wait_until_healthy(ctx, ceph_admin, use_sudo=True)
 792     yield
 793
 794
 795 @contextlib.contextmanager
 796 def task(ctx, config):
 797     """
 798     Set up and tear down a Ceph cluster.
 799
 800     For example::
 801
 802         tasks:
 803         - install:
 804              extras: yes
 805         - ssh_keys:
 806         - ceph-deploy:
 807              branch:
 808                 stable: bobtail
 809              mon_initial_members: 1
 810              ceph-deploy-branch: my-ceph-deploy-branch
 811              only_mon: true
 812              keep_running: true
 813              # either choose bluestore or filestore, default is bluestore
 814              bluestore: True
 815              # or
 816              filestore: True
 817              # skip install of mgr for old release using below flag
 818              skip-mgr: True  ( default is False )
 819
 820         tasks:
 821         - install:
 822              extras: yes
 823         - ssh_keys:
 824         - ceph-deploy:
 825              branch:
 826                 dev: master
 827              conf:
 828                 mon:
 829                    debug mon = 20
 830
 831         tasks:
 832         - install:
 833              extras: yes
 834         - ssh_keys:
 835         - ceph-deploy:
 836              branch:
 837                 testing:
 838              dmcrypt: yes
 839              separate_journal_disk: yes
 840
 841     """
 842     if config is None:
 843         config = {}
 844
 845     assert isinstance(config, dict), \
 846         "task ceph-deploy only supports a dictionary for configuration"
 847
 848     overrides = ctx.config.get('overrides', {})
 849     teuthology.deep_merge(config, overrides.get('ceph-deploy', {}))
 850
 851     if config.get('branch') is not None:
 852         assert isinstance(
 853             config['branch'], dict), 'branch must be a dictionary'
 854
 855     log.info('task ceph-deploy with config ' + str(config))
 856
 857     with contextutil.nested(
 858         lambda: install_fn.ship_utilities(ctx=ctx, config=None),
 859         lambda: download_ceph_deploy(ctx=ctx, config=config),
 860         lambda: build_ceph_cluster(ctx=ctx, config=config),
 861     ):
 862         yield