3 # Copyright (C) 2015, 2016, 2017 Red Hat <contact@redhat.com>
4 # Copyright (C) 2014 Inktank <info@inktank.com>
5 # Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com>
6 # Copyright (C) 2014 Catalyst.net Ltd
8 # Author: Loic Dachary <loic@dachary.org>
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU Library Public License as published by
12 # the Free Software Foundation; either version 2, or (at your option)
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Library Public License for more details.
21 from __future__ import print_function
46 CEPH_OSD_ONDISK_MAGIC = 'ceph osd volume v026'
47 CEPH_LOCKBOX_ONDISK_MAGIC = 'ceph lockbox volume v001'
49 KEY_MANAGEMENT_MODE_V1 = 'ceph-mon v1'
54 # identical because creating a journal is atomic
55 'ready': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
56 'tobe': '45b0969e-9b03-4f30-b4c6-b4b80ceff106',
59 # identical because creating a block is atomic
60 'ready': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
61 'tobe': 'cafecafe-9b03-4f30-b4c6-b4b80ceff106',
64 # identical because creating a block is atomic
65 'ready': '30cd0809-c2b2-499c-8879-2d6b78529876',
66 'tobe': '30cd0809-c2b2-499c-8879-2d6b785292be',
69 # identical because creating a block is atomic
70 'ready': '5ce17fce-4087-4169-b7ff-056cc58473f9',
71 'tobe': '5ce17fce-4087-4169-b7ff-056cc58472be',
74 'ready': '4fbd7e29-9d25-41b8-afd0-062c0ceff05d',
75 'tobe': '89c57f98-2fe5-4dc0-89c1-f3ad0ceff2be',
78 'ready': 'fb3aabf9-d25f-47cc-bf5e-721d1816496b',
79 'tobe': 'fb3aabf9-d25f-47cc-bf5e-721d181642be',
84 'ready': '45b0969e-9b03-4f30-b4c6-35865ceff106',
85 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
88 'ready': 'cafecafe-9b03-4f30-b4c6-35865ceff106',
89 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
92 'ready': '166418da-c469-4022-adf4-b30afd37f176',
93 'tobe': '7521c784-4626-4260-bc8d-ba77a0f5f2be',
96 'ready': '86a32090-3647-40b9-bbbd-38d8c573aa86',
97 'tobe': '92dad30f-175b-4d40-a5b0-5c0a258b42be',
100 'ready': '4fbd7e29-9d25-41b8-afd0-35865ceff05d',
101 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
106 'ready': '45b0969e-9b03-4f30-b4c6-5ec00ceff106',
107 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
110 'ready': 'cafecafe-9b03-4f30-b4c6-5ec00ceff106',
111 'tobe': '89c57f98-2fe5-4dc0-89c1-35865ceff2be',
114 'ready': '93b0052d-02d9-4d8a-a43b-33a3ee4dfbc3',
115 'tobe': '69d17c68-3e58-4399-aff0-b68265f2e2be',
118 'ready': '306e8683-4fe2-4330-b7c0-00a917c16966',
119 'tobe': 'f2d89683-a621-4063-964a-eb1f7863a2be',
122 'ready': '4fbd7e29-9d25-41b8-afd0-5ec00ceff05d',
123 'tobe': '89c57f98-2fe5-4dc0-89c1-5ec00ceff2be',
128 'ready': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
129 'tobe': '45b0969e-8ae0-4982-bf9d-5a8d867af560',
132 'ready': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
133 'tobe': 'cafecafe-8ae0-4982-bf9d-5a8d867af560',
136 'ready': 'ec6d6385-e346-45dc-be91-da2a7c8b3261',
137 'tobe': 'ec6d6385-e346-45dc-be91-da2a7c8b32be',
140 'ready': '01b41e1b-002a-453c-9f17-88793989ff8f',
141 'tobe': '01b41e1b-002a-453c-9f17-88793989f2be',
144 'ready': '4fbd7e29-8ae0-4982-bf9d-5a8d867af560',
145 'tobe': '89c57f98-8ae0-4982-bf9d-5a8d867af560',
148 'ready': '7f4a666a-16f3-47a2-8445-152ef4d03f6c',
149 'tobe': '7f4a666a-16f3-47a2-8445-152ef4d032be',
155 # see https://bugs.python.org/issue23098
157 except OverflowError:
158 os.major = lambda devid: ((devid >> 8) & 0xfff) | ((devid >> 32) & ~0xfff)
159 os.minor = lambda devid: (devid & 0xff) | ((devid >> 12) & ~0xff)
165 def get_ready_by_type(what):
166 return [x['ready'] for x in PTYPE[what].values()]
169 def get_ready_by_name(name):
170 return [x[name]['ready'] for x in PTYPE.values() if name in x]
173 def is_regular_space(ptype):
174 return Ptype.is_what_space('regular', ptype)
177 def is_mpath_space(ptype):
178 return Ptype.is_what_space('mpath', ptype)
181 def is_plain_space(ptype):
182 return Ptype.is_what_space('plain', ptype)
185 def is_luks_space(ptype):
186 return Ptype.is_what_space('luks', ptype)
189 def is_what_space(what, ptype):
190 for name in Space.NAMES:
191 if ptype == PTYPE[what][name]['ready']:
196 def space_ptype_to_name(ptype):
197 for what in PTYPE.values():
198 for name in Space.NAMES:
199 if ptype == what[name]['ready']:
201 raise ValueError('ptype ' + ptype + ' not found')
204 def is_dmcrypt_space(ptype):
205 for name in Space.NAMES:
206 if Ptype.is_dmcrypt(ptype, name):
211 def is_dmcrypt(ptype, name):
212 for what in ('plain', 'luks'):
213 if ptype == PTYPE[what][name]['ready']:
220 if platform.system() == 'FreeBSD':
222 DEFAULT_FS_TYPE = 'zfs'
223 PROCDIR = '/compat/linux/proc'
224 # FreeBSD does not have blockdevices any more
229 DEFAULT_FS_TYPE = 'xfs'
231 BLOCKDIR = '/sys/block'
235 OSD STATUS Definition
237 OSD_STATUS_OUT_DOWN = 0
238 OSD_STATUS_OUT_UP = 1
239 OSD_STATUS_IN_DOWN = 2
242 MOUNT_OPTIONS = dict(
243 btrfs='noatime,user_subvol_rm_allowed',
244 # user_xattr is default ever since linux 2.6.39 / 3.0, but we'll
245 # delay a moment before removing it fully because we did have some
246 # issues with ext4 before the xatts-in-leveldb work, and it seemed
247 # that user_xattr helped
248 ext4='noatime,user_xattr',
249 xfs='noatime,inode64',
254 # btrfs requires -f, for the same reason as xfs (see comment below)
261 # xfs insists on not overwriting previous fs; even if we wipe
262 # partition table, we often recreate it exactly the same way,
263 # so we'll see ghosts of filesystems past
282 STATEDIR = '/var/lib/ceph'
284 SYSCONFDIR = '/etc/ceph'
288 SUPPRESS_PREFIX = None
290 # only warn once about some things
293 # Nuke the TERM variable to avoid confusing any subprocesses we call.
294 # For example, libreadline will print weird control sequences for some
296 if 'TERM' in os.environ:
297 del os.environ['TERM']
300 if LOG_NAME == '__main__':
301 LOG_NAME = os.path.basename(sys.argv[0])
302 LOG = logging.getLogger(LOG_NAME)
304 # Allow user-preferred values for subprocess user and group
305 CEPH_PREF_USER = None
306 CEPH_PREF_GROUP = None
309 class FileLock(object):
310 def __init__(self, fn):
316 self.fd = os.open(self.fn, os.O_WRONLY | os.O_CREAT)
317 fcntl.lockf(self.fd, fcntl.LOCK_EX)
319 def __exit__(self, exc_type, exc_val, exc_tb):
321 fcntl.lockf(self.fd, fcntl.LOCK_UN)
326 class Error(Exception):
332 doc = _bytes2str(self.__doc__.strip())
334 str_type = basestring
337 args = [a if isinstance(a, str_type) else str(a) for a in self.args]
338 return ': '.join([doc] + [_bytes2str(a) for a in args])
341 class MountError(Error):
343 Mounting filesystem failed
347 class UnmountError(Error):
349 Unmounting filesystem failed
353 class BadMagicError(Error):
355 Does not look like a Ceph OSD, or incompatible version
359 class TruncatedLineError(Error):
365 class TooManyLinesError(Error):
371 class FilesystemTypeError(Error):
373 Cannot discover filesystem type
377 class CephDiskException(Exception):
379 A base exception for ceph-disk to provide custom (ad-hoc) messages that
380 will be caught and dealt with when main() is executed
385 class ExecutableNotFound(CephDiskException):
387 Exception to report on executables not available in PATH
394 Detect whether systemd is running
396 with open(PROCDIR + '/1/comm', 'r') as f:
397 return 'systemd' in f.read()
402 Detect whether upstart is running
404 (out, err, _) = command(['init', '--version'])
405 return 'upstart' in out
408 def maybe_mkdir(*a, **kw):
410 Creates a new directory if it doesn't exist, removes
411 existing symlink before creating the directory.
413 # remove any symlink, if it is there..
414 if os.path.exists(*a) and stat.S_ISLNK(os.lstat(*a).st_mode):
415 LOG.debug('Removing old symlink at %s', *a)
420 if e.errno == errno.EEXIST:
426 def which(executable):
427 """find the location of an executable"""
428 envpath = os.environ.get('PATH') or os.defpath
429 PATH = envpath.split(os.pathsep)
440 for location in locations:
441 executable_path = os.path.join(location, executable)
442 if (os.path.isfile(executable_path) and
443 os.access(executable_path, os.X_OK)):
444 return executable_path
447 def _get_command_executable(arguments):
449 Return the full path for an executable, raise if the executable is not
450 found. If the executable has already a full path do not perform any checks.
452 if os.path.isabs(arguments[0]): # an absolute path
454 executable = which(arguments[0])
456 command_msg = 'Could not run command: %s' % ' '.join(arguments)
457 executable_msg = '%s not in path.' % arguments[0]
458 raise ExecutableNotFound('%s %s' % (executable_msg, command_msg))
460 # swap the old executable for the new one
461 arguments[0] = executable
465 def command(arguments, **kwargs):
467 Safely execute a ``subprocess.Popen`` call making sure that the
468 executable exists and raising a helpful error message
471 .. note:: This should be the preferred way of calling ``subprocess.Popen``
472 since it provides the caller with the safety net of making sure that
473 executables *will* be found and will error nicely otherwise.
475 This returns the output of the command and the return code of the
476 process in a tuple: (stdout, stderr, returncode).
479 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
481 LOG.info('Running command: %s' % ' '.join(arguments))
482 process = subprocess.Popen(
484 stdout=subprocess.PIPE,
485 stderr=subprocess.PIPE,
487 out, err = process.communicate()
489 return _bytes2str(out), _bytes2str(err), process.returncode
492 def command_with_stdin(arguments, stdin):
493 LOG.info("Running command with stdin: " + " ".join(arguments))
494 process = subprocess.Popen(
496 stdin=subprocess.PIPE,
497 stdout=subprocess.PIPE,
498 stderr=subprocess.PIPE)
499 out, err = process.communicate(stdin)
501 if process.returncode != 0:
504 "'{cmd}' failed with status code {returncode}".format(
506 returncode=process.returncode,
512 def _bytes2str(string):
513 return string.decode('utf-8') if isinstance(string, bytes) else string
516 def command_init(arguments, **kwargs):
518 Safely execute a non-blocking ``subprocess.Popen`` call
519 making sure that the executable exists and raising a helpful
520 error message if it does not.
522 .. note:: This should be the preferred way of calling ``subprocess.Popen``
523 since it provides the caller with the safety net of making sure that
524 executables *will* be found and will error nicely otherwise.
526 This returns the process.
529 arguments = list(map(_bytes2str, _get_command_executable(arguments)))
531 LOG.info('Running command: %s' % ' '.join(arguments))
532 process = subprocess.Popen(
534 stdout=subprocess.PIPE,
535 stderr=subprocess.PIPE,
540 def command_wait(process):
542 Wait for the process finish and parse its output.
545 out, err = process.communicate()
547 return _bytes2str(out), _bytes2str(err), process.returncode
550 def command_check_call(arguments, exit=False):
552 Safely execute a ``subprocess.check_call`` call making sure that the
553 executable exists and raising a helpful error message if it does not.
555 When ``exit`` is set to ``True`` this helper will do a clean (sans
556 traceback) system exit.
557 .. note:: This should be the preferred way of calling
558 ``subprocess.check_call`` since it provides the caller with the safety net
559 of making sure that executables *will* be found and will error nicely
562 arguments = _get_command_executable(arguments)
563 command = ' '.join(arguments)
564 LOG.info('Running command: %s', command)
566 return subprocess.check_call(arguments)
567 except subprocess.CalledProcessError as error:
570 LOG.error(error.output)
572 "'{cmd}' failed with status code {returncode}".format(
574 returncode=error.returncode,
581 # An alternative block_path implementation would be
583 # name = basename(dev)
584 # return /sys/devices/virtual/block/$name
586 # It is however more fragile because it relies on the fact
587 # that the basename of the device the user will use always
588 # matches the one the driver will use. On Ubuntu 14.04, for
589 # instance, when multipath creates a partition table on
591 # /dev/mapper/353333330000007d0 -> ../dm-0
593 # it will create partition devices named
595 # /dev/mapper/353333330000007d0-part1
597 # which is the same device as /dev/dm-1 but not a symbolic
600 # ubuntu@other:~$ ls -l /dev/mapper /dev/dm-1
601 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 /dev/dm-1
602 # lrwxrwxrwx 1 root root 7 Aug 15 17:52 353333330000007d0 -> ../dm-0
603 # brw-rw---- 1 root disk 252, 1 Aug 15 17:52 353333330000007d0-part1
605 # Using the basename in this case fails.
612 path = os.path.realpath(dev)
613 rdev = os.stat(path).st_rdev
614 (M, m) = (os.major(rdev), os.minor(rdev))
615 return "{sysfs}/dev/block/{M}:{m}".format(sysfs=SYSFS, M=M, m=m)
618 def get_dm_uuid(dev):
619 uuid_path = os.path.join(block_path(dev), 'dm', 'uuid')
620 LOG.debug("get_dm_uuid " + dev + " uuid path is " + uuid_path)
621 if not os.path.exists(uuid_path):
623 uuid = open(uuid_path, 'r').read()
624 LOG.debug("get_dm_uuid " + dev + " uuid is " + uuid)
630 True if the path is managed by multipath
634 uuid = get_dm_uuid(dev)
636 (re.match('part\d+-mpath-', uuid) or
637 re.match('mpath-', uuid)))
640 def get_dev_name(path):
642 get device name from path. e.g.::
644 /dev/sda -> sda, /dev/cciss/c0d1 -> cciss!c0d1
646 a device "name" is something like::
652 assert path.startswith('/dev/')
654 return base.replace('/', '!')
657 def get_dev_path(name):
659 get a path (/dev/...) from a name (cciss!c0d1)
660 a device "path" is something like::
666 return '/dev/' + name.replace('!', '/')
669 def get_dev_relpath(name):
671 get a relative path to /dev from a name (cciss!c0d1)
673 return name.replace('!', '/')
676 def get_dev_size(dev, size='megabytes'):
678 Attempt to get the size of a device so that we can prevent errors
679 from actions to devices that are smaller, and improve error reporting.
681 Because we want to avoid breakage in case this approach is not robust, we
682 will issue a warning if we failed to get the size.
684 :param size: bytes or megabytes
685 :param dev: the device to calculate the size
687 fd = os.open(dev, os.O_RDONLY)
688 dividers = {'bytes': 1, 'megabytes': 1024 * 1024}
690 device_size = os.lseek(fd, 0, os.SEEK_END)
691 divider = dividers.get(size, 1024 * 1024) # default to megabytes
692 return device_size // divider
693 except Exception as error:
694 LOG.warning('failed to get size of %s: %s' % (dev, str(error)))
699 def stmode_is_diskdevice(dmode):
700 if stat.S_ISBLK(dmode):
703 # FreeBSD does not have block devices
704 # All disks are character devices
705 return FREEBSD and stat.S_ISCHR(dmode)
708 def dev_is_diskdevice(dev):
709 dmode = os.stat(dev).st_mode
710 return stmode_is_diskdevice(dmode)
713 def ldev_is_diskdevice(dev):
714 dmode = os.lstat(dev).st_mode
715 return stmode_is_diskdevice(dmode)
718 def path_is_diskdevice(path):
719 dev = os.path.realpath(path)
720 return dev_is_diskdevice(dev)
723 def get_partition_mpath(dev, pnum):
724 part_re = "part{pnum}-mpath-".format(pnum=pnum)
725 partitions = list_partitions_mpath(dev, part_re)
732 def retry(on_error=Exception, max_tries=10, wait=0.2, backoff=0):
734 @functools.wraps(func)
735 def repeat(*args, **kwargs):
736 for tries in range(max_tries - 1):
738 return func(*args, **kwargs)
740 time.sleep(wait + backoff * tries)
741 return func(*args, **kwargs)
747 def get_partition_dev(dev, pnum):
749 get the device name for a partition
751 assume that partitions are named like the base dev,
752 with a number, and optionally
753 some intervening characters (like 'p'). e.g.,
756 cciss/c0d1 1 -> cciss!c0d1p1
761 partname = get_partition_mpath(dev, pnum)
763 name = get_dev_name(os.path.realpath(dev))
764 sys_entry = os.path.join(BLOCKDIR, name)
765 error_msg = " in %s" % sys_entry
766 for f in os.listdir(sys_entry):
767 if f.startswith(name) and f.endswith(str(pnum)):
768 # we want the shortest name that starts with the base name
769 # and ends with the partition number
770 if not partname or len(f) < len(partname):
773 return get_dev_path(partname)
775 raise Error('partition %d for %s does not appear to exist%s' %
776 (pnum, dev, error_msg))
779 def list_all_partitions():
781 Return a list of devices and partitions
784 names = os.listdir(BLOCKDIR)
787 # /dev/fd0 may hang http://tracker.ceph.com/issues/6827
788 if re.match(r'^fd\d$', name):
790 dev_part_list[name] = list_partitions(get_dev_path(name))
792 with open(os.path.join(PROCDIR, "partitions")) as partitions:
793 for line in partitions:
794 columns = line.split()
795 if len(columns) >= 4:
797 dev_part_list[name] = list_partitions(get_dev_path(name))
801 def list_partitions(dev):
802 dev = os.path.realpath(dev)
804 return list_partitions_mpath(dev)
806 return list_partitions_device(dev)
809 def list_partitions_mpath(dev, part_re="part\d+-mpath-"):
812 holders = os.path.join(p, 'holders')
813 for holder in os.listdir(holders):
814 uuid_path = os.path.join(holders, holder, 'dm', 'uuid')
815 uuid = open(uuid_path, 'r').read()
816 LOG.debug("list_partitions_mpath: " + uuid_path + " uuid = " + uuid)
817 if re.match(part_re, uuid):
818 partitions.append(holder)
822 def list_partitions_device(dev):
824 Return a list of partitions on the given device name
827 basename = get_dev_name(dev)
828 for name in os.listdir(block_path(dev)):
829 if name.startswith(basename):
830 partitions.append(name)
834 def get_partition_base(dev):
836 Get the base device for a partition
838 dev = os.path.realpath(dev)
839 if not ldev_is_diskdevice(dev):
840 raise Error('not a block device', dev)
842 name = get_dev_name(dev)
843 if os.path.exists(os.path.join('/sys/block', name)):
844 raise Error('not a partition', dev)
847 for basename in os.listdir('/sys/block'):
848 if os.path.exists(os.path.join('/sys/block', basename, name)):
849 return get_dev_path(basename)
850 raise Error('no parent device for partition', dev)
853 def is_partition_mpath(dev):
854 uuid = get_dm_uuid(dev)
855 return bool(re.match('part\d+-mpath-', uuid))
858 def partnum_mpath(dev):
859 uuid = get_dm_uuid(dev)
860 return re.findall('part(\d+)-mpath-', uuid)[0]
863 def get_partition_base_mpath(dev):
864 slave_path = os.path.join(block_path(dev), 'slaves')
865 slaves = os.listdir(slave_path)
867 name_path = os.path.join(slave_path, slaves[0], 'dm', 'name')
868 name = open(name_path, 'r').read().strip()
869 return os.path.join('/dev/mapper', name)
872 def is_partition(dev):
874 Check whether a given device path is a partition or a full disk.
877 return is_partition_mpath(dev)
879 dev = os.path.realpath(dev)
881 if not stmode_is_diskdevice(st.st_mode):
882 raise Error('not a block device', dev)
884 name = get_dev_name(dev)
885 if os.path.exists(os.path.join(BLOCKDIR, name)):
888 # make sure it is a partition of something else
889 major = os.major(st.st_rdev)
890 minor = os.minor(st.st_rdev)
891 if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
894 raise Error('not a disk or partition', dev)
899 Check if the given device is mounted.
901 dev = os.path.realpath(dev)
902 with open(PROCDIR + '/mounts', 'rb') as proc_mounts:
903 for line in proc_mounts:
904 fields = line.split()
907 mounts_dev = fields[0]
909 if os.path.isabs(mounts_dev) and os.path.exists(mounts_dev):
910 mounts_dev = os.path.realpath(mounts_dev)
911 if mounts_dev == dev:
912 return _bytes2str(path)
918 Check if a device is held by another device (e.g., a dm-crypt mapping)
920 assert os.path.exists(dev)
924 dev = os.path.realpath(dev)
925 base = get_dev_name(dev)
928 directory = '/sys/block/{base}/holders'.format(base=base)
929 if os.path.exists(directory):
930 return os.listdir(directory)
935 directory = '/sys/block/{base}/{part}/holders'.format(
936 part=part, base=base)
937 if os.path.exists(directory):
938 return os.listdir(directory)
943 def verify_not_in_use(dev, check_partitions=False):
945 Verify if a given device (path) is in use (e.g. mounted or
946 in use by device-mapper).
948 :raises: Error if device is in use.
950 assert os.path.exists(dev)
952 raise Error('Device is mounted', dev)
953 holders = is_held(dev)
955 raise Error('Device %s is in use by a device-mapper '
956 'mapping (dm-crypt?)' % dev, ','.join(holders))
958 if check_partitions and not is_partition(dev):
959 for partname in list_partitions(dev):
960 partition = get_dev_path(partname)
961 if is_mounted(partition):
962 raise Error('Device is mounted', partition)
963 holders = is_held(partition)
965 raise Error('Device %s is in use by a device-mapper '
966 'mapping (dm-crypt?)'
967 % partition, ','.join(holders))
970 def must_be_one_line(line):
972 Checks if given line is really one single line.
974 :raises: TruncatedLineError or TooManyLinesError
975 :return: Content of the line, or None if line isn't valid.
977 line = _bytes2str(line)
979 if line[-1:] != '\n':
980 raise TruncatedLineError(line)
983 raise TooManyLinesError(line)
987 def read_one_line(parent, name):
989 Read a file whose sole contents are a single line.
993 :return: Contents of the line, or None if file did not exist.
995 path = os.path.join(parent, name)
997 line = open(path, 'rb').read()
999 if e.errno == errno.ENOENT:
1005 line = must_be_one_line(line)
1006 except (TruncatedLineError, TooManyLinesError) as e:
1008 'File is corrupt: {path}: {msg}'.format(
1016 def write_one_line(parent, name, text):
1018 Write a file whose sole contents are a single line.
1022 path = os.path.join(parent, name)
1023 tmp = '{path}.{pid}.tmp'.format(path=path, pid=os.getpid())
1024 with open(tmp, 'wb') as tmp_file:
1025 tmp_file.write(text.encode('utf-8') + b'\n')
1026 os.fsync(tmp_file.fileno())
1027 path_set_context(tmp)
1028 os.rename(tmp, path)
1033 Get a init system using 'ceph-detect-init'
1035 init = _check_output(
1038 '--default', 'sysvinit',
1041 init = must_be_one_line(init)
1045 def check_osd_magic(path):
1047 Check that this path has the Ceph OSD magic.
1049 :raises: BadMagicError if this does not look like a Ceph OSD data
1052 magic = read_one_line(path, 'magic')
1054 # probably not mkfs'ed yet
1055 raise BadMagicError(path)
1056 if magic != CEPH_OSD_ONDISK_MAGIC:
1057 raise BadMagicError(path)
1060 def check_osd_id(osd_id):
1062 Ensures osd id is numeric.
1064 if not re.match(r'^[0-9]+$', osd_id):
1065 raise Error('osd id is not numeric', osd_id)
1068 def allocate_osd_id(
1075 Allocates an OSD id on the given cluster.
1077 :raises: Error if the call to allocate the OSD id fails.
1078 :return: The allocated OSD id.
1080 lockbox_path = os.path.join(STATEDIR, 'osd-lockbox', fsid)
1081 lockbox_osd_id = read_one_line(lockbox_path, 'whoami')
1082 osd_keyring = os.path.join(path, 'keyring')
1084 LOG.debug('Getting OSD id from Lockbox...')
1085 osd_id = lockbox_osd_id
1086 shutil.move(os.path.join(lockbox_path, 'osd_keyring'),
1088 path_set_context(osd_keyring)
1089 os.unlink(os.path.join(lockbox_path, 'whoami'))
1092 LOG.debug('Allocating OSD id...')
1095 wanttobe = read_one_line(path, 'wanttobe')
1096 if os.path.exists(os.path.join(path, 'wanttobe')):
1097 os.unlink(os.path.join(path, 'wanttobe'))
1098 id_arg = wanttobe and [wanttobe] or []
1099 osd_id = command_with_stdin(
1102 '--cluster', cluster,
1103 '--name', 'client.bootstrap-osd',
1104 '--keyring', keyring,
1111 except subprocess.CalledProcessError as e:
1112 raise Error('ceph osd create failed', e, e.output)
1113 osd_id = must_be_one_line(osd_id)
1114 check_osd_id(osd_id)
1115 secrets.write_osd_keyring(osd_keyring, osd_id)
1119 def get_osd_id(path):
1121 Gets the OSD id of the OSD at the given path.
1123 osd_id = read_one_line(path, 'whoami')
1124 if osd_id is not None:
1125 check_osd_id(osd_id)
1129 def get_ceph_user():
1130 global CEPH_PREF_USER
1132 if CEPH_PREF_USER is not None:
1134 pwd.getpwnam(CEPH_PREF_USER)
1135 return CEPH_PREF_USER
1137 print("No such user:", CEPH_PREF_USER)
1141 pwd.getpwnam('ceph')
1147 def get_ceph_group():
1148 global CEPH_PREF_GROUP
1150 if CEPH_PREF_GROUP is not None:
1152 grp.getgrnam(CEPH_PREF_GROUP)
1153 return CEPH_PREF_GROUP
1155 print("No such group:", CEPH_PREF_GROUP)
1159 grp.getgrnam('ceph')
1165 def path_set_context(path):
1166 # restore selinux context to default policy values
1167 if which('restorecon'):
1168 command(['restorecon', '-R', path])
1170 # if ceph user exists, set owner to ceph
1171 if get_ceph_user() == 'ceph':
1172 command(['chown', '-R', 'ceph:ceph', path])
1175 def _check_output(args=None, **kwargs):
1176 out, err, ret = command(args, **kwargs)
1179 error = subprocess.CalledProcessError(ret, cmd)
1180 error.output = out + err
1182 return _bytes2str(out)
1185 def get_conf(cluster, variable):
1187 Get the value of the given configuration variable from the
1190 :raises: Error if call to ceph-conf fails.
1191 :return: The variable value or None.
1194 out, err, ret = command(
1197 '--cluster={cluster}'.format(
1206 except OSError as e:
1207 raise Error('error executing ceph-conf', e, err)
1209 # config entry not found
1212 raise Error('getting variable from configuration failed')
1213 value = out.split('\n', 1)[0]
1214 # don't differentiate between "var=" and no var set
1220 def get_conf_with_default(cluster, variable):
1222 Get a config value that is known to the C++ code.
1224 This will fail if called on variables that are not defined in
1225 common config options.
1228 out = _check_output(
1231 '--cluster={cluster}'.format(
1234 '--show-config-value={variable}'.format(
1240 except subprocess.CalledProcessError as e:
1242 'getting variable from configuration failed',
1246 value = str(out).split('\n', 1)[0]
1250 def get_fsid(cluster):
1252 Get the fsid of the cluster.
1254 :return: The fsid or raises Error.
1256 fsid = get_conf_with_default(cluster=cluster, variable='fsid')
1258 raise Error('getting cluster uuid from configuration failed')
1262 def get_dmcrypt_key_path(
1268 Get path to dmcrypt key file.
1270 :return: Path to the dmcrypt key file, callers should check for existence.
1273 path = os.path.join(key_dir, _uuid + ".luks.key")
1275 path = os.path.join(key_dir, _uuid)
1280 def get_dmcrypt_key(
1285 legacy_path = get_dmcrypt_key_path(_uuid, key_dir, luks)
1286 if os.path.exists(legacy_path):
1287 return (legacy_path,)
1288 path = os.path.join(STATEDIR, 'osd-lockbox', _uuid)
1289 if os.path.exists(path):
1290 mode = get_oneliner(path, 'key-management-mode')
1291 osd_uuid = get_oneliner(path, 'osd-uuid')
1292 ceph_fsid = read_one_line(path, 'ceph_fsid')
1293 if ceph_fsid is None:
1294 LOG.warning("no `ceph_fsid` found falling back to 'ceph' "
1298 cluster = find_cluster_by_uuid(ceph_fsid)
1300 raise Error('No cluster conf found in ' + SYSCONFDIR +
1301 ' with fsid %s' % ceph_fsid)
1303 if mode == KEY_MANAGEMENT_MODE_V1:
1304 key, stderr, ret = command(
1307 '--cluster', cluster,
1309 'client.osd-lockbox.' + osd_uuid,
1311 os.path.join(path, 'keyring'),
1314 'dm-crypt/osd/' + osd_uuid + '/luks',
1317 LOG.debug("stderr " + stderr)
1319 return base64.b64decode(key)
1321 raise Error('unknown key-management-mode ' + str(mode))
1322 raise Error('unable to read dm-crypt key', path, legacy_path)
1329 cryptsetup_parameters,
1333 dev = dmcrypt_is_mapped(_uuid)
1337 if isinstance(key, tuple):
1338 # legacy, before lockbox
1339 assert os.path.exists(key[0])
1344 dev = '/dev/mapper/' + _uuid
1352 ] + cryptsetup_parameters
1370 ] + cryptsetup_parameters
1375 command_with_stdin(luksFormat_args, key)
1376 command_with_stdin(luksOpen_args, key)
1378 # Plain mode has no format function, nor any validation
1379 # that the key is correct.
1380 command_with_stdin(create_args, key)
1381 # set proper ownership of mapped device
1382 command_check_call(['chown', 'ceph:ceph', dev])
1385 except subprocess.CalledProcessError as e:
1386 raise Error('unable to map device', rawdev, e)
1389 @retry(Error, max_tries=10, wait=0.5, backoff=1.0)
1390 def dmcrypt_unmap(_uuid):
1391 if not os.path.exists('/dev/mapper/' + _uuid):
1394 command_check_call(['cryptsetup', 'remove', _uuid])
1395 except subprocess.CalledProcessError as e:
1396 raise Error('unable to unmap device', _uuid, e)
1405 Mounts a device with given filessystem type and
1406 mount options to a tempfile path under /var/lib/ceph/tmp.
1408 # sanity check: none of the arguments are None
1410 raise ValueError('dev may not be None')
1412 raise ValueError('fstype may not be None')
1414 # pick best-of-breed mount options based on fs type
1416 options = MOUNT_OPTIONS.get(fstype, '')
1418 myTemp = STATEDIR + '/tmp'
1419 # mkdtemp expect 'dir' to be existing on the system
1420 # Let's be sure it's always the case
1421 if not os.path.exists(myTemp):
1425 path = tempfile.mkdtemp(
1430 LOG.debug('Mounting %s on %s with options %s', dev, path, options)
1441 if which('restorecon'):
1448 except subprocess.CalledProcessError as e:
1451 except (OSError, IOError):
1458 @retry(UnmountError, max_tries=3, wait=0.5, backoff=1.0)
1464 Unmount and removes the given mount point.
1467 LOG.debug('Unmounting %s', path)
1475 except subprocess.CalledProcessError as e:
1476 raise UnmountError(e)
1482 ###########################################
1484 def extract_parted_partition_numbers(partitions):
1485 numbers_as_strings = re.findall('^\d+', partitions, re.MULTILINE)
1486 return map(int, numbers_as_strings)
1489 def get_free_partition_index(dev):
1491 Get the next free partition index on a given device.
1493 :return: Index number (> 1 if there is already a partition on the device)
1494 or 1 if there is no partition table.
1497 lines = _check_output(
1506 except subprocess.CalledProcessError as e:
1507 LOG.info('cannot read partition index; assume it '
1508 'isn\'t present\n (Error: %s)' % e)
1512 raise Error('parted failed to output anything')
1513 LOG.debug('get_free_partition_index: analyzing ' + lines)
1514 if ('CHS;' not in lines and
1515 'CYL;' not in lines and
1516 'BYT;' not in lines):
1517 raise Error('parted output expected to contain one of ' +
1518 'CHH; CYL; or BYT; : ' + lines)
1519 if os.path.realpath(dev) not in lines:
1520 raise Error('parted output expected to contain ' + dev + ': ' + lines)
1521 _, partitions = lines.split(os.path.realpath(dev))
1522 partition_numbers = extract_parted_partition_numbers(partitions)
1523 if partition_numbers:
1524 return max(partition_numbers) + 1
1529 def check_journal_reqs(args):
1530 _, _, allows_journal = command([
1531 'ceph-osd', '--check-allows-journal',
1533 '--log-file', '$run_dir/$cluster-osd-check.log',
1534 '--cluster', args.cluster,
1535 '--setuser', get_ceph_user(),
1536 '--setgroup', get_ceph_group(),
1538 _, _, wants_journal = command([
1539 'ceph-osd', '--check-wants-journal',
1541 '--log-file', '$run_dir/$cluster-osd-check.log',
1542 '--cluster', args.cluster,
1543 '--setuser', get_ceph_user(),
1544 '--setgroup', get_ceph_group(),
1546 _, _, needs_journal = command([
1547 'ceph-osd', '--check-needs-journal',
1549 '--log-file', '$run_dir/$cluster-osd-check.log',
1550 '--cluster', args.cluster,
1551 '--setuser', get_ceph_user(),
1552 '--setgroup', get_ceph_group(),
1554 return (not allows_journal, not wants_journal, not needs_journal)
1557 def update_partition(dev, description):
1559 Must be called after modifying a partition table so the kernel
1560 know about the change and fire udev events accordingly. A side
1561 effect of partprobe is to remove partitions and add them again.
1562 The first udevadm settle waits for ongoing udev events to
1563 complete, just in case one of them rely on an existing partition
1564 on dev. The second udevadm settle guarantees to the caller that
1565 all udev events related to the partition table change have been
1566 processed, i.e. the 95-ceph-osd.rules actions and mode changes,
1567 group changes etc. are complete.
1569 LOG.debug('Calling partprobe on %s device %s', description, dev)
1570 partprobe_ok = False
1571 error = 'unknown error'
1572 partprobe = _get_command_executable(['partprobe'])[0]
1574 command_check_call(['udevadm', 'settle', '--timeout=600'])
1576 _check_output(['flock', '-s', dev, partprobe, dev])
1579 except subprocess.CalledProcessError as e:
1581 if ('unable to inform the kernel' not in error and
1582 'Device or resource busy' not in error):
1584 LOG.debug('partprobe %s failed : %s (ignored, waiting 60s)'
1587 if not partprobe_ok:
1588 raise Error('partprobe %s failed : %s' % (dev, error))
1589 command_check_call(['udevadm', 'settle', '--timeout=600'])
1594 # Thoroughly wipe all partitions of any traces of
1595 # Filesystems or OSD Journals
1597 # In addition we need to write 10M of data to each partition
1598 # to make sure that after re-creating the same partition
1599 # there is no trace left of any previous Filesystem or OSD
1602 LOG.debug('Writing zeros to existing partitions on %s', dev)
1604 for partname in list_partitions(dev):
1605 partition = get_dev_path(partname)
1618 'of={path}'.format(path=partition),
1624 LOG.debug('Zapping partition table on %s', dev)
1626 # try to wipe out any GPT partition table backups. sgdisk
1627 # isn't too thorough.
1629 size = 33 * lba_size
1630 with open(dev, 'wb') as dev_file:
1631 dev_file.seek(-size, os.SEEK_END)
1632 dev_file.write(size * b'\0')
1651 update_partition(dev, 'zapped')
1653 except subprocess.CalledProcessError as e:
1657 def zap_freebsd(dev):
1659 # For FreeBSD we just need to zap the partition.
1669 except subprocess.CalledProcessError as e:
1675 Destroy the partition table and content of a given disk.
1677 dev = os.path.realpath(dev)
1678 dmode = os.stat(dev).st_mode
1679 if not stat.S_ISBLK(dmode) or is_partition(dev):
1680 raise Error('not full block device; cannot zap', dev)
1687 def adjust_symlink(target, path):
1689 if os.path.lexists(path):
1691 mode = os.lstat(path).st_mode
1692 if stat.S_ISREG(mode):
1693 LOG.debug('Removing old file %s', path)
1695 elif stat.S_ISLNK(mode):
1696 old = os.readlink(path)
1698 LOG.debug('Removing old symlink %s -> %s', path, old)
1703 raise Error('unable to remove (or adjust) old file (symlink)',
1706 LOG.debug('Creating symlink %s -> %s', path, target)
1708 os.symlink(target, path)
1710 raise Error('unable to create symlink %s -> %s' % (path, target))
1713 def get_mount_options(cluster, fs_type):
1714 mount_options = get_conf(
1716 variable='osd_mount_options_{fstype}'.format(
1720 if mount_options is None:
1721 mount_options = get_conf(
1723 variable='osd_fs_mount_options_{fstype}'.format(
1728 # remove whitespaces
1729 mount_options = "".join(mount_options.split())
1730 return mount_options
1733 class Device(object):
1735 def __init__(self, path, args):
1738 self.dev_size = None
1739 self.partitions = {}
1740 self.ptype_map = None
1741 assert not is_partition(self.path)
1743 def create_partition(self, uuid, name, size=0, num=0):
1744 ptype = self.ptype_tobe_for_name(name)
1746 num = get_free_partition_index(dev=self.path)
1748 new = '--new={num}:0:+{size}M'.format(num=num, size=size)
1749 if size > self.get_dev_size():
1750 LOG.error('refusing to create %s on %s' % (name, self.path))
1751 LOG.error('%s size (%sM) is bigger than device (%sM)'
1752 % (name, size, self.get_dev_size()))
1753 raise Error('%s device size (%sM) is not big enough for %s'
1754 % (self.path, self.get_dev_size(), name))
1756 new = '--largest-new={num}'.format(num=num)
1758 LOG.debug('Creating %s partition num %d size %d on %s',
1759 name, num, size, self.path)
1764 '--change-name={num}:ceph {name}'.format(num=num, name=name),
1765 '--partition-guid={num}:{uuid}'.format(num=num, uuid=uuid),
1766 '--typecode={num}:{uuid}'.format(num=num, uuid=ptype),
1773 update_partition(self.path, 'created')
1776 def ptype_tobe_for_name(self, name):
1777 LOG.debug("name = " + name)
1780 if name == 'lockbox':
1781 if is_mpath(self.path):
1782 return PTYPE['mpath']['lockbox']['tobe']
1784 return PTYPE['regular']['lockbox']['tobe']
1785 if self.ptype_map is None:
1786 partition = DevicePartition.factory(
1787 path=self.path, dev=None, args=self.args)
1788 self.ptype_map = partition.ptype_map
1789 return self.ptype_map[name]['tobe']
1791 def get_partition(self, num):
1792 if num not in self.partitions:
1793 dev = get_partition_dev(self.path, num)
1794 partition = DevicePartition.factory(
1795 path=self.path, dev=dev, args=self.args)
1796 partition.set_partition_number(num)
1797 self.partitions[num] = partition
1798 return self.partitions[num]
1800 def get_dev_size(self):
1801 if self.dev_size is None:
1802 self.dev_size = get_dev_size(self.path)
1803 return self.dev_size
1806 def factory(path, args):
1807 return Device(path, args)
1810 class DevicePartition(object):
1812 def __init__(self, args):
1818 self.ptype_map = None
1820 self.set_variables_ptype()
1823 if self.uuid is None:
1824 self.uuid = get_partition_uuid(self.rawdev)
1827 def get_ptype(self):
1828 if self.ptype is None:
1829 self.ptype = get_partition_type(self.rawdev)
1832 def set_partition_number(self, num):
1835 def get_partition_number(self):
1838 def set_dev(self, dev):
1845 def get_rawdev(self):
1848 def set_variables_ptype(self):
1849 self.ptype_map = PTYPE['regular']
1851 def ptype_for_name(self, name):
1852 return self.ptype_map[name]['ready']
1856 def factory(path, dev, args):
1857 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
1858 if ((path is not None and is_mpath(path)) or
1859 (dev is not None and is_mpath(dev))):
1860 partition = DevicePartitionMultipath(args)
1861 elif dmcrypt_type == 'luks':
1862 partition = DevicePartitionCryptLuks(args)
1863 elif dmcrypt_type == 'plain':
1864 partition = DevicePartitionCryptPlain(args)
1866 partition = DevicePartition(args)
1867 partition.set_dev(dev)
1871 class DevicePartitionMultipath(DevicePartition):
1873 def set_variables_ptype(self):
1874 self.ptype_map = PTYPE['mpath']
1877 class DevicePartitionCrypt(DevicePartition):
1879 def __init__(self, args):
1880 super(DevicePartitionCrypt, self).__init__(args)
1881 self.osd_dm_key = None
1882 self.cryptsetup_parameters = CryptHelpers.get_cryptsetup_parameters(
1884 self.dmcrypt_type = CryptHelpers.get_dmcrypt_type(self.args)
1885 self.dmcrypt_keysize = CryptHelpers.get_dmcrypt_keysize(self.args)
1887 def setup_crypt(self):
1892 self.dev = _dmcrypt_map(
1894 key=self.osd_dm_key,
1895 _uuid=self.get_uuid(),
1896 cryptsetup_parameters=self.cryptsetup_parameters,
1903 dmcrypt_unmap(self.get_uuid())
1904 self.dev = self.rawdev
1911 class DevicePartitionCryptPlain(DevicePartitionCrypt):
1916 def setup_crypt(self):
1917 if self.osd_dm_key is not None:
1920 self.cryptsetup_parameters += ['--key-size', str(self.dmcrypt_keysize)]
1922 self.osd_dm_key = get_dmcrypt_key(
1923 self.get_uuid(), self.args.dmcrypt_key_dir,
1926 def set_variables_ptype(self):
1927 self.ptype_map = PTYPE['plain']
1930 class DevicePartitionCryptLuks(DevicePartitionCrypt):
1935 def setup_crypt(self):
1936 if self.osd_dm_key is not None:
1939 if self.dmcrypt_keysize == 1024:
1940 # We don't force this into the cryptsetup_parameters,
1941 # as we want the cryptsetup defaults
1942 # to prevail for the actual LUKS key lengths.
1945 self.cryptsetup_parameters += ['--key-size',
1946 str(self.dmcrypt_keysize)]
1948 self.osd_dm_key = get_dmcrypt_key(
1949 self.get_uuid(), self.args.dmcrypt_key_dir,
1952 def set_variables_ptype(self):
1953 self.ptype_map = PTYPE['luks']
1956 class Prepare(object):
1958 def __init__(self, args):
1963 parser = argparse.ArgumentParser(add_help=False)
1964 parser.add_argument(
1968 help='cluster name to assign this disk to',
1970 parser.add_argument(
1973 help='cluster uuid to assign this disk to',
1975 parser.add_argument(
1978 help='unique OSD uuid to assign this disk to',
1980 parser.add_argument(
1983 help='unique OSD id to assign this disk to',
1985 parser.add_argument(
1986 '--crush-device-class',
1987 help='crush device class to assign this disk to',
1989 parser.add_argument(
1991 action='store_true', default=None,
1992 help='encrypt DATA and/or JOURNAL devices with dm-crypt',
1994 parser.add_argument(
1995 '--dmcrypt-key-dir',
1997 default='/etc/ceph/dmcrypt-keys',
1998 help='directory where dm-crypt keys are stored',
2000 parser.add_argument(
2003 help='bootstrap-osd keyring path template (%(default)s)',
2004 default='{statedir}/bootstrap-osd/{cluster}.keyring',
2005 dest='prepare_key_template',
2007 parser.add_argument(
2009 action='store_true', default=None,
2010 help='let many prepare\'s run in parallel',
2015 def set_subparser(subparsers):
2018 PrepareData.parser(),
2021 parents.extend(PrepareFilestore.parent_parsers())
2022 parents.extend(PrepareBluestore.parent_parsers())
2023 parser = subparsers.add_parser(
2026 formatter_class=argparse.RawDescriptionHelpFormatter,
2027 description=textwrap.fill(textwrap.dedent("""\
2028 If the --bluestore argument is given, a bluestore objectstore
2029 will be created. If --filestore is provided, a legacy FileStore
2030 objectstore will be created. If neither is specified, we default
2033 When an entire device is prepared for bluestore, two
2034 partitions are created. The first partition is for metadata,
2035 the second partition is for blocks that contain data.
2037 Unless explicitly specified with --block.db or
2038 --block.wal, the bluestore DB and WAL data is stored on
2039 the main block device. For instance:
2041 ceph-disk prepare --bluestore /dev/sdc
2045 /dev/sdc1 for osd metadata
2046 /dev/sdc2 for block, db, and wal data (the rest of the disk)
2049 If either --block.db or --block.wal are specified to be
2050 the same whole device, they will be created as partition
2051 three and four respectively. For instance:
2053 ceph-disk prepare --bluestore \\
2054 --block.db /dev/sdc \\
2055 --block.wal /dev/sdc \\
2060 /dev/sdc1 for osd metadata
2061 /dev/sdc2 for block (the rest of the disk)
2066 help='Prepare a directory or disk for a Ceph OSD',
2068 parser.set_defaults(
2074 if self.args.no_locking:
2083 return PrepareBluestore(args)
2085 return PrepareFilestore(args)
2089 Prepare.factory(args).prepare()
2092 class PrepareFilestore(Prepare):
2094 def __init__(self, args):
2095 super(PrepareFilestore, self).__init__(args)
2097 self.lockbox = Lockbox(args)
2098 self.data = PrepareFilestoreData(args)
2099 self.journal = PrepareJournal(args)
2102 def parent_parsers():
2104 PrepareJournal.parser(),
2108 if self.data.args.dmcrypt:
2109 self.lockbox.prepare()
2110 self.data.prepare(self.journal)
2113 class PrepareBluestore(Prepare):
2115 def __init__(self, args):
2116 super(PrepareBluestore, self).__init__(args)
2118 self.lockbox = Lockbox(args)
2119 self.data = PrepareBluestoreData(args)
2120 self.block = PrepareBluestoreBlock(args)
2121 self.blockdb = PrepareBluestoreBlockDB(args)
2122 self.blockwal = PrepareBluestoreBlockWAL(args)
2126 parser = argparse.ArgumentParser(add_help=False)
2127 parser.add_argument(
2130 action='store_true', default=True,
2131 help='bluestore objectstore',
2133 parser.add_argument(
2136 action='store_false',
2137 help='filestore objectstore',
2142 def parent_parsers():
2144 PrepareBluestore.parser(),
2145 PrepareBluestoreBlock.parser(),
2146 PrepareBluestoreBlockDB.parser(),
2147 PrepareBluestoreBlockWAL.parser(),
2151 if self.data.args.dmcrypt:
2152 self.lockbox.prepare()
2153 to_prepare_list = []
2154 if getattr(self.data.args, 'block.db'):
2155 to_prepare_list.append(self.blockdb)
2156 if getattr(self.data.args, 'block.wal'):
2157 to_prepare_list.append(self.blockwal)
2158 to_prepare_list.append(self.block)
2159 self.data.prepare(*to_prepare_list)
2162 class Space(object):
2164 NAMES = ('block', 'journal', 'block.db', 'block.wal')
2167 class PrepareSpace(object):
2173 def __init__(self, args):
2176 self.space_size = self.get_space_size()
2177 if getattr(self.args, self.name + '_uuid') is None:
2178 setattr(self.args, self.name + '_uuid', str(uuid.uuid4()))
2179 self.space_symlink = None
2180 self.space_dmcrypt = None
2185 if (self.wants_space() and
2186 dev_is_diskdevice(args.data) and
2187 not is_partition(args.data) and
2188 getattr(args, name) is None and
2189 getattr(args, name + '_file') is None):
2190 LOG.info('Will colocate %s with data on %s',
2192 setattr(args, name, args.data)
2194 if getattr(args, name) is None:
2195 if getattr(args, name + '_dev'):
2196 raise Error('%s is unspecified; not a block device' %
2197 name.capitalize(), getattr(args, name))
2198 self.type = self.NONE
2201 if not os.path.exists(getattr(args, name)):
2202 if getattr(args, name + '_dev'):
2203 raise Error('%s does not exist; not a block device' %
2204 name.capitalize(), getattr(args, name))
2205 self.type = self.FILE
2208 mode = os.stat(getattr(args, name)).st_mode
2209 if stmode_is_diskdevice(mode):
2210 if getattr(args, name + '_file'):
2211 raise Error('%s is not a regular file' % name.capitalize,
2212 getattr(args, name))
2213 self.type = self.DEVICE
2216 if stat.S_ISREG(mode):
2217 if getattr(args, name + '_dev'):
2218 raise Error('%s is not a block device' % name.capitalize,
2219 getattr(args, name))
2220 self.type = self.FILE
2223 raise Error('%s %s is neither a block device nor regular file' %
2224 (name.capitalize, getattr(args, name)))
2227 return self.type == self.NONE
2230 return self.type == self.FILE
2232 def is_device(self):
2233 return self.type == self.DEVICE
2236 def parser(name, positional=True):
2237 parser = argparse.ArgumentParser(add_help=False)
2238 parser.add_argument(
2241 help='unique uuid to assign to the %s' % name,
2243 parser.add_argument(
2245 action='store_true', default=None,
2246 help='verify that %s is a file' % name.upper(),
2248 parser.add_argument(
2250 action='store_true', default=None,
2251 help='verify that %s is a block device' % name.upper(),
2255 parser.add_argument(
2257 metavar=name.upper(),
2259 help=('path to OSD %s disk block device;' % name +
2260 ' leave out to store %s in file' % name),
2264 def wants_space(self):
2267 def populate_data_path(self, path):
2268 if self.type == self.DEVICE:
2269 self.populate_data_path_device(path)
2270 elif self.type == self.FILE:
2271 self.populate_data_path_file(path)
2272 elif self.type == self.NONE:
2275 raise Error('unexpected type ', self.type)
2277 def populate_data_path_file(self, path):
2278 space_uuid = self.name + '_uuid'
2279 if getattr(self.args, space_uuid) is not None:
2280 write_one_line(path, space_uuid,
2281 getattr(self.args, space_uuid))
2282 if self.space_symlink is not None:
2283 adjust_symlink(self.space_symlink,
2284 os.path.join(path, self.name))
2286 def populate_data_path_device(self, path):
2287 self.populate_data_path_file(path)
2289 if self.space_dmcrypt is not None:
2290 adjust_symlink(self.space_dmcrypt,
2291 os.path.join(path, self.name + '_dmcrypt'))
2294 os.unlink(os.path.join(path, self.name + '_dmcrypt'))
2299 if self.type == self.DEVICE:
2300 self.prepare_device()
2301 elif self.type == self.FILE:
2303 elif self.type == self.NONE:
2306 raise Error('unexpected type ', self.type)
2308 def prepare_file(self):
2309 space_filename = getattr(self.args, self.name)
2310 if not os.path.exists(space_filename):
2311 LOG.debug('Creating %s file %s with size 0'
2312 ' (ceph-osd will resize and allocate)',
2315 space_file = open(space_filename, 'wb')
2317 path_set_context(space_filename)
2319 LOG.debug('%s is file %s',
2320 self.name.capitalize(),
2322 LOG.warning('OSD will not be hot-swappable if %s is '
2323 'not the same device as the osd data' %
2325 self.space_symlink = space_filename
2327 def prepare_device(self):
2328 reusing_partition = False
2330 if is_partition(getattr(self.args, self.name)):
2331 LOG.debug('%s %s is a partition',
2332 self.name.capitalize(), getattr(self.args, self.name))
2333 partition = DevicePartition.factory(
2334 path=None, dev=getattr(self.args, self.name), args=self.args)
2335 if isinstance(partition, DevicePartitionCrypt):
2336 raise Error(getattr(self.args, self.name) +
2337 ' partition already exists'
2338 ' and --dmcrypt specified')
2339 LOG.warning('OSD will not be hot-swappable' +
2340 ' if ' + self.name + ' is not' +
2341 ' the same device as the osd data')
2342 if partition.get_ptype() == partition.ptype_for_name(self.name):
2343 LOG.debug('%s %s was previously prepared with '
2344 'ceph-disk. Reusing it.',
2345 self.name.capitalize(),
2346 getattr(self.args, self.name))
2347 reusing_partition = True
2348 # Read and reuse the partition uuid from this journal's
2349 # previous life. We reuse the uuid instead of changing it
2350 # because udev does not reliably notice changes to an
2351 # existing partition's GUID. See
2352 # http://tracker.ceph.com/issues/10146
2353 setattr(self.args, self.name + '_uuid', partition.get_uuid())
2354 LOG.debug('Reusing %s with uuid %s',
2356 getattr(self.args, self.name + '_uuid'))
2358 LOG.warning('%s %s was not prepared with '
2359 'ceph-disk. Symlinking directly.',
2360 self.name.capitalize(),
2361 getattr(self.args, self.name))
2362 self.space_symlink = getattr(self.args, self.name)
2365 self.space_symlink = '/dev/disk/by-partuuid/{uuid}'.format(
2366 uuid=getattr(self.args, self.name + '_uuid'))
2368 if self.args.dmcrypt:
2369 self.space_dmcrypt = self.space_symlink
2370 self.space_symlink = '/dev/mapper/{uuid}'.format(
2371 uuid=getattr(self.args, self.name + '_uuid'))
2373 if reusing_partition:
2374 # confirm that the space_symlink exists. It should since
2375 # this was an active space
2376 # in the past. Continuing otherwise would be futile.
2377 assert os.path.exists(self.space_symlink)
2380 num = self.desired_partition_number()
2383 LOG.warning('OSD will not be hot-swappable if %s '
2384 'is not the same device as the osd data',
2387 device = Device.factory(getattr(self.args, self.name), self.args)
2388 num = device.create_partition(
2389 uuid=getattr(self.args, self.name + '_uuid'),
2391 size=self.space_size,
2394 partition = device.get_partition(num)
2396 LOG.debug('%s is GPT partition %s',
2397 self.name.capitalize(),
2400 if isinstance(partition, DevicePartitionCrypt):
2407 '--typecode={num}:{uuid}'.format(
2409 uuid=partition.ptype_for_name(self.name),
2412 getattr(self.args, self.name),
2415 update_partition(getattr(self.args, self.name), 'prepared')
2417 LOG.debug('%s is GPT partition %s',
2418 self.name.capitalize(),
2422 class PrepareJournal(PrepareSpace):
2424 def __init__(self, args):
2425 self.name = 'journal'
2426 (self.allows_journal,
2428 self.needs_journal) = check_journal_reqs(args)
2430 if args.journal and not self.allows_journal:
2431 raise Error('journal specified but not allowed by osd backend')
2433 super(PrepareJournal, self).__init__(args)
2435 def wants_space(self):
2436 return self.wants_journal
2438 def get_space_size(self):
2439 return int(get_conf_with_default(
2440 cluster=self.args.cluster,
2441 variable='osd_journal_size',
2444 def desired_partition_number(self):
2445 if self.args.journal == self.args.data:
2446 # we're sharing the disk between osd data and journal;
2447 # make journal be partition number 2
2455 return PrepareSpace.parser('journal')
2458 class PrepareBluestoreBlock(PrepareSpace):
2460 def __init__(self, args):
2462 super(PrepareBluestoreBlock, self).__init__(args)
2464 def get_space_size(self):
2465 block_size = get_conf(
2466 cluster=self.args.cluster,
2467 variable='bluestore_block_size',
2470 if block_size is None:
2471 return 0 # get as much space as possible
2473 return int(block_size) / 1048576 # MB
2475 def desired_partition_number(self):
2476 if self.args.block == self.args.data:
2484 return PrepareSpace.parser('block')
2487 class PrepareBluestoreBlockDB(PrepareSpace):
2489 def __init__(self, args):
2490 self.name = 'block.db'
2491 super(PrepareBluestoreBlockDB, self).__init__(args)
2493 def get_space_size(self):
2494 block_db_size = get_conf(
2495 cluster=self.args.cluster,
2496 variable='bluestore_block_db_size',
2499 if block_db_size is None or int(block_db_size) == 0:
2500 block_size = get_conf(
2501 cluster=self.args.cluster,
2502 variable='bluestore_block_size',
2504 if block_size is None:
2506 size = int(block_size) / 100 / 1048576
2507 return max(size, 1024) # MB
2509 return int(block_db_size) / 1048576 # MB
2511 def desired_partition_number(self):
2512 if getattr(self.args, 'block.db') == self.args.data:
2518 def wants_space(self):
2523 parser = PrepareSpace.parser('block.db', positional=False)
2524 parser.add_argument(
2527 help='path to the device or file for bluestore block.db',
2532 class PrepareBluestoreBlockWAL(PrepareSpace):
2534 def __init__(self, args):
2535 self.name = 'block.wal'
2536 super(PrepareBluestoreBlockWAL, self).__init__(args)
2538 def get_space_size(self):
2539 block_size = get_conf(
2540 cluster=self.args.cluster,
2541 variable='bluestore_block_wal_size',
2544 if block_size is None:
2545 return 576 # MB, default value
2547 return int(block_size) / 1048576 # MB
2549 def desired_partition_number(self):
2550 if getattr(self.args, 'block.wal') == self.args.data:
2556 def wants_space(self):
2561 parser = PrepareSpace.parser('block.wal', positional=False)
2562 parser.add_argument(
2565 help='path to the device or file for bluestore block.wal',
2570 class CryptHelpers(object):
2573 def get_cryptsetup_parameters(args):
2574 cryptsetup_parameters_str = get_conf(
2575 cluster=args.cluster,
2576 variable='osd_cryptsetup_parameters',
2578 if cryptsetup_parameters_str is None:
2581 return shlex.split(cryptsetup_parameters_str)
2584 def get_dmcrypt_keysize(args):
2585 dmcrypt_keysize_str = get_conf(
2586 cluster=args.cluster,
2587 variable='osd_dmcrypt_key_size',
2589 dmcrypt_type = CryptHelpers.get_dmcrypt_type(args)
2590 if dmcrypt_type == 'luks':
2591 if dmcrypt_keysize_str is None:
2592 # As LUKS will hash the 'passphrase' in .luks.key
2593 # into a key, set a large default
2594 # so if not updated for some time, it is still a
2599 return int(dmcrypt_keysize_str)
2600 elif dmcrypt_type == 'plain':
2601 if dmcrypt_keysize_str is None:
2602 # This value is hard-coded in the udev script
2605 LOG.warning('ensure the 95-ceph-osd.rules file has '
2606 'been copied to /etc/udev/rules.d '
2607 'and modified to call cryptsetup '
2608 'with --key-size=%s' % dmcrypt_keysize_str)
2609 return int(dmcrypt_keysize_str)
2614 def get_dmcrypt_type(args):
2615 if hasattr(args, 'dmcrypt') and args.dmcrypt:
2616 dmcrypt_type = get_conf(
2617 cluster=args.cluster,
2618 variable='osd_dmcrypt_type',
2621 if dmcrypt_type is None or dmcrypt_type == 'luks':
2623 elif dmcrypt_type == 'plain':
2626 raise Error('invalid osd_dmcrypt_type parameter '
2627 '(must be luks or plain): ', dmcrypt_type)
2632 class Secrets(object):
2635 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2636 LOG.debug("stderr " + stderr)
2639 'cephx_secret': secret.strip(),
2642 def write_osd_keyring(self, keyring, osd_id):
2645 'ceph-authtool', keyring,
2647 '--name', 'osd.' + str(osd_id),
2648 '--add-key', self.keys['cephx_secret'],
2650 path_set_context(keyring)
2653 return bytearray(json.dumps(self.keys), 'ascii')
2656 class LockboxSecrets(Secrets):
2658 def __init__(self, args):
2659 super(LockboxSecrets, self).__init__()
2661 key_size = CryptHelpers.get_dmcrypt_keysize(args)
2662 key = open('/dev/urandom', 'rb').read(key_size / 8)
2663 base64_key = base64.b64encode(key).decode('ascii')
2665 secret, stderr, ret = command(['ceph-authtool', '--gen-print-key'])
2666 LOG.debug("stderr " + stderr)
2670 'dmcrypt_key': base64_key,
2671 'cephx_lockbox_secret': secret.strip(),
2674 def write_lockbox_keyring(self, path, osd_uuid):
2675 keyring = os.path.join(path, 'keyring')
2678 'ceph-authtool', keyring,
2680 '--name', 'client.osd-lockbox.' + osd_uuid,
2681 '--add-key', self.keys['cephx_lockbox_secret'],
2683 path_set_context(keyring)
2686 class Lockbox(object):
2688 def __init__(self, args):
2690 self.partition = None
2693 if hasattr(self.args, 'lockbox') and self.args.lockbox is None:
2694 self.args.lockbox = self.args.data
2696 def set_partition(self, partition):
2697 self.partition = partition
2701 parser = argparse.ArgumentParser(add_help=False)
2702 parser.add_argument(
2704 help='path to the device to store the lockbox',
2706 parser.add_argument(
2709 help='unique lockbox uuid',
2713 def create_partition(self):
2714 self.device = Device.factory(self.args.lockbox, argparse.Namespace())
2715 partition_number = 5
2716 self.device.create_partition(uuid=self.args.lockbox_uuid,
2718 num=partition_number,
2720 return self.device.get_partition(partition_number)
2722 def set_or_create_partition(self):
2723 if is_partition(self.args.lockbox):
2724 LOG.debug('OSD lockbox device %s is a partition',
2726 self.partition = DevicePartition.factory(
2727 path=None, dev=self.args.lockbox, args=self.args)
2728 ptype = self.partition.get_ptype()
2729 ready = Ptype.get_ready_by_name('lockbox')
2730 if ptype not in ready:
2731 LOG.warning('incorrect partition UUID: %s, expected %s'
2732 % (ptype, str(ready)))
2734 LOG.debug('Creating osd partition on %s',
2736 self.partition = self.create_partition()
2738 def create_key(self):
2739 cluster = self.args.cluster
2740 bootstrap = self.args.prepare_key_template.format(cluster=cluster,
2742 path = self.get_mount_point()
2743 secrets = LockboxSecrets(self.args)
2744 id_arg = self.args.osd_id and [self.args.osd_id] or []
2745 osd_id = command_with_stdin(
2748 '--cluster', cluster,
2749 '--name', 'client.bootstrap-osd',
2750 '--keyring', bootstrap,
2752 'osd', 'new', self.args.osd_uuid,
2756 secrets.write_lockbox_keyring(path, self.args.osd_uuid)
2757 osd_id = must_be_one_line(osd_id)
2758 check_osd_id(osd_id)
2759 write_one_line(path, 'whoami', osd_id)
2760 secrets.write_osd_keyring(os.path.join(path, 'osd_keyring'), osd_id)
2761 write_one_line(path, 'key-management-mode', KEY_MANAGEMENT_MODE_V1)
2763 def symlink_spaces(self, path):
2764 target = self.get_mount_point()
2765 for name in Space.NAMES:
2766 if (hasattr(self.args, name + '_uuid') and
2767 getattr(self.args, name + '_uuid')):
2768 uuid = getattr(self.args, name + '_uuid')
2769 symlink = os.path.join(STATEDIR, 'osd-lockbox', uuid)
2770 adjust_symlink(target, symlink)
2771 write_one_line(path, name + '-uuid', uuid)
2774 maybe_mkdir(os.path.join(STATEDIR, 'osd-lockbox'))
2775 args = ['mkfs', '-t', 'ext4', self.partition.get_dev()]
2776 LOG.debug('Creating lockbox fs on %s: ' + str(" ".join(args)))
2777 command_check_call(args)
2778 path = self.get_mount_point()
2780 args = ['mount', '-t', 'ext4', self.partition.get_dev(), path]
2781 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
2782 command_check_call(args)
2783 write_one_line(path, 'osd-uuid', self.args.osd_uuid)
2784 if self.args.cluster_uuid is None:
2785 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2786 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2788 self.symlink_spaces(path)
2789 write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC)
2790 if self.device is not None:
2794 '--typecode={num}:{uuid}'.format(
2795 num=self.partition.get_partition_number(),
2796 uuid=self.partition.ptype_for_name('lockbox'),
2799 get_partition_base(self.partition.get_dev()),
2803 def get_mount_point(self):
2804 return os.path.join(STATEDIR, 'osd-lockbox', self.args.osd_uuid)
2806 def get_osd_uuid(self):
2807 return self.args.osd_uuid
2810 path = is_mounted(self.partition.get_dev())
2812 LOG.info("Lockbox already mounted at " + path)
2815 path = tempfile.mkdtemp(
2817 dir=STATEDIR + '/tmp',
2819 args = ['mount', '-t', 'ext4', '-o', 'ro',
2820 self.partition.get_dev(),
2822 LOG.debug('Mounting lockbox temporarily ' + str(" ".join(args)))
2823 command_check_call(args)
2824 self.args.osd_uuid = get_oneliner(path, 'osd-uuid')
2825 command_check_call(['umount', path])
2826 LOG.debug('Mounting lockbox readonly ' + str(" ".join(args)))
2827 args = ['mount', '-t', 'ext4', '-o', 'ro',
2828 self.partition.get_dev(),
2829 self.get_mount_point()]
2830 command_check_call(args)
2831 for name in Space.NAMES + ('osd',):
2832 uuid_path = os.path.join(self.get_mount_point(), name + '-uuid')
2833 if os.path.exists(uuid_path):
2834 uuid = get_oneliner(self.get_mount_point(), name + '-uuid')
2835 dev = os.path.join('/dev/disk/by-partuuid/', uuid.lower())
2836 args = ['ceph-disk', 'trigger', dev]
2837 command_check_call(args)
2840 verify_not_in_use(self.args.lockbox, check_partitions=True)
2841 self.set_or_create_partition()
2845 class PrepareData(object):
2850 def __init__(self, args):
2853 self.partition = None
2855 if self.args.cluster_uuid is None:
2856 self.args.cluster_uuid = get_fsid(cluster=self.args.cluster)
2858 if self.args.osd_uuid is None:
2859 self.args.osd_uuid = str(uuid.uuid4())
2862 dmode = os.stat(self.args.data).st_mode
2864 if stat.S_ISDIR(dmode):
2865 self.type = self.FILE
2866 elif stmode_is_diskdevice(dmode):
2867 self.type = self.DEVICE
2869 raise Error('not a dir or block device', self.args.data)
2872 return self.type == self.FILE
2874 def is_device(self):
2875 return self.type == self.DEVICE
2879 parser = argparse.ArgumentParser(add_help=False)
2880 parser.add_argument(
2882 help='file system type to use (e.g. "ext4")',
2884 parser.add_argument(
2886 action='store_true', default=None,
2887 help='destroy the partition table (and content) of a disk',
2889 parser.add_argument(
2891 action='store_true', default=None,
2892 help='verify that DATA is a dir',
2894 parser.add_argument(
2896 action='store_true', default=None,
2897 help='verify that DATA is a block device',
2899 parser.add_argument(
2902 help='path to OSD data (a disk block device or directory)',
2906 def populate_data_path_file(self, path, *to_prepare_list):
2907 self.populate_data_path(path, *to_prepare_list)
2909 def populate_data_path(self, path, *to_prepare_list):
2910 if os.path.exists(os.path.join(path, 'magic')):
2911 LOG.debug('Data dir %s already exists', path)
2914 LOG.debug('Preparing osd data dir %s', path)
2916 if self.args.osd_uuid is None:
2917 self.args.osd_uuid = str(uuid.uuid4())
2919 write_one_line(path, 'ceph_fsid', self.args.cluster_uuid)
2920 write_one_line(path, 'fsid', self.args.osd_uuid)
2921 if self.args.osd_id:
2922 write_one_line(path, 'wanttobe', self.args.osd_id)
2923 if self.args.crush_device_class:
2924 write_one_line(path, 'crush_device_class',
2925 self.args.crush_device_class)
2926 write_one_line(path, 'magic', CEPH_OSD_ONDISK_MAGIC)
2928 for to_prepare in to_prepare_list:
2929 to_prepare.populate_data_path(path)
2931 def prepare(self, *to_prepare_list):
2932 if self.type == self.DEVICE:
2933 self.prepare_device(*to_prepare_list)
2934 elif self.type == self.FILE:
2935 self.prepare_file(*to_prepare_list)
2937 raise Error('unexpected type ', self.type)
2939 def prepare_file(self, *to_prepare_list):
2941 if not os.path.exists(self.args.data):
2942 raise Error('data path for directory does not exist',
2945 if self.args.data_dev:
2946 raise Error('data path is not a block device', self.args.data)
2948 for to_prepare in to_prepare_list:
2949 to_prepare.prepare()
2951 self.populate_data_path_file(self.args.data, *to_prepare_list)
2953 def sanity_checks(self):
2954 if not os.path.exists(self.args.data):
2955 raise Error('data path for device does not exist',
2957 verify_not_in_use(self.args.data,
2958 check_partitions=not self.args.dmcrypt)
2960 def set_variables(self):
2961 if self.args.fs_type is None:
2962 self.args.fs_type = get_conf(
2963 cluster=self.args.cluster,
2964 variable='osd_mkfs_type',
2966 if self.args.fs_type is None:
2967 self.args.fs_type = get_conf(
2968 cluster=self.args.cluster,
2969 variable='osd_fs_type',
2971 if self.args.fs_type is None:
2972 self.args.fs_type = DEFAULT_FS_TYPE
2974 self.mkfs_args = get_conf(
2975 cluster=self.args.cluster,
2976 variable='osd_mkfs_options_{fstype}'.format(
2977 fstype=self.args.fs_type,
2980 if self.mkfs_args is None:
2981 self.mkfs_args = get_conf(
2982 cluster=self.args.cluster,
2983 variable='osd_fs_mkfs_options_{fstype}'.format(
2984 fstype=self.args.fs_type,
2988 self.mount_options = get_mount_options(cluster=self.args.cluster,
2989 fs_type=self.args.fs_type)
2991 if self.args.osd_uuid is None:
2992 self.args.osd_uuid = str(uuid.uuid4())
2994 def prepare_device(self, *to_prepare_list):
2995 self.sanity_checks()
2996 self.set_variables()
2997 if self.args.zap_disk is not None:
3000 def create_data_partition(self):
3001 device = Device.factory(self.args.data, self.args)
3002 partition_number = 1
3003 device.create_partition(uuid=self.args.osd_uuid,
3005 num=partition_number,
3006 size=self.get_space_size())
3007 return device.get_partition(partition_number)
3009 def set_data_partition(self):
3010 if is_partition(self.args.data):
3011 LOG.debug('OSD data device %s is a partition',
3013 self.partition = DevicePartition.factory(
3014 path=None, dev=self.args.data, args=self.args)
3015 ptype = self.partition.get_ptype()
3016 ready = Ptype.get_ready_by_name('osd')
3017 if ptype not in ready:
3018 LOG.warning('incorrect partition UUID: %s, expected %s'
3019 % (ptype, str(ready)))
3021 LOG.debug('Creating osd partition on %s',
3023 self.partition = self.create_data_partition()
3025 def populate_data_path_device(self, *to_prepare_list):
3026 partition = self.partition
3028 if isinstance(partition, DevicePartitionCrypt):
3037 if self.mkfs_args is not None:
3038 args.extend(self.mkfs_args.split())
3039 if self.args.fs_type == 'xfs':
3040 args.extend(['-f']) # always force
3042 args.extend(MKFS_ARGS.get(self.args.fs_type, []))
3045 partition.get_dev(),
3047 LOG.debug('Creating %s fs on %s',
3048 self.args.fs_type, partition.get_dev())
3049 command_check_call(args, exit=True)
3051 path = mount(dev=partition.get_dev(),
3052 fstype=self.args.fs_type,
3053 options=self.mount_options)
3056 self.populate_data_path(path, *to_prepare_list)
3058 path_set_context(path)
3061 if isinstance(partition, DevicePartitionCrypt):
3064 if not is_partition(self.args.data):
3068 '--typecode=%d:%s' % (partition.get_partition_number(),
3069 partition.ptype_for_name('osd')),
3075 update_partition(self.args.data, 'prepared')
3076 command_check_call(['udevadm', 'trigger',
3079 os.path.basename(partition.rawdev)])
3082 class PrepareFilestoreData(PrepareData):
3084 def get_space_size(self):
3085 return 0 # get as much space as possible
3087 def prepare_device(self, *to_prepare_list):
3088 super(PrepareFilestoreData, self).prepare_device(*to_prepare_list)
3089 for to_prepare in to_prepare_list:
3090 to_prepare.prepare()
3091 self.set_data_partition()
3092 self.populate_data_path_device(*to_prepare_list)
3094 def populate_data_path(self, path, *to_prepare_list):
3095 super(PrepareFilestoreData, self).populate_data_path(path,
3097 write_one_line(path, 'type', 'filestore')
3100 class PrepareBluestoreData(PrepareData):
3102 def get_space_size(self):
3105 def prepare_device(self, *to_prepare_list):
3106 super(PrepareBluestoreData, self).prepare_device(*to_prepare_list)
3107 self.set_data_partition()
3108 for to_prepare in to_prepare_list:
3109 to_prepare.prepare()
3110 self.populate_data_path_device(*to_prepare_list)
3112 def populate_data_path(self, path, *to_prepare_list):
3113 super(PrepareBluestoreData, self).populate_data_path(path,
3115 write_one_line(path, 'type', 'bluestore')
3125 monmap = os.path.join(path, 'activate.monmap')
3129 '--cluster', cluster,
3130 '--name', 'client.bootstrap-osd',
3131 '--keyring', keyring,
3132 'mon', 'getmap', '-o', monmap,
3136 osd_type = read_one_line(path, 'type')
3138 if osd_type == 'bluestore':
3142 '--cluster', cluster,
3148 '--setuser', get_ceph_user(),
3149 '--setgroup', get_ceph_group(),
3152 elif osd_type == 'filestore':
3156 '--cluster', cluster,
3161 '--osd-journal', os.path.join(path, 'journal'),
3163 '--setuser', get_ceph_user(),
3164 '--setgroup', get_ceph_group(),
3168 raise Error('unrecognized objectstore type %s' % osd_type)
3171 def get_mount_point(cluster, osd_id):
3172 parent = STATEDIR + '/osd'
3173 return os.path.join(
3175 '{cluster}-{osd_id}'.format(cluster=cluster, osd_id=osd_id),
3187 LOG.debug('Moving mount to final location...')
3188 osd_data = get_mount_point(cluster, osd_id)
3189 maybe_mkdir(osd_data)
3191 # pick best-of-breed mount options based on fs type
3192 if mount_options is None:
3193 mount_options = MOUNT_OPTIONS.get(fstype, '')
3195 # we really want to mount --move, but that is not supported when
3196 # the parent mount is shared, as it is by default on RH, Fedora,
3197 # and probably others. Also, --bind doesn't properly manipulate
3198 # /etc/mtab, which *still* isn't a symlink to /proc/mounts despite
3199 # this being 2013. Instead, mount the original device at the final
3214 '-l', # lazy, in case someone else is peeking at the
3223 # For upgrade purposes, to make sure there are no competing units,
3224 # both --runtime unit and the default should be disabled. There can be
3225 # two units at the same time: one with --runtime and another without
3226 # it. If, for any reason (manual or ceph-disk) the two units co-exist
3227 # they will compete with each other.
3229 def systemd_disable(
3233 # ensure there is no duplicate ceph-osd@.service
3234 for style in ([], ['--runtime']):
3239 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3248 systemd_disable(path, osd_id)
3249 if os.path.ismount(path):
3250 style = ['--runtime']
3257 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3264 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3273 systemd_disable(path, osd_id)
3278 'ceph-osd@{osd_id}'.format(osd_id=osd_id),
3287 LOG.debug('Starting %s osd.%s...', cluster, osd_id)
3289 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3290 cluster=cluster, osd_id=osd_id)
3293 if os.path.exists(os.path.join(path, 'upstart')):
3297 # use emit, not start, because start would fail if the
3298 # instance was already running
3300 # since the daemon starting doesn't guarantee much about
3301 # the service being operational anyway, don't bother
3306 'cluster={cluster}'.format(cluster=cluster),
3307 'id={osd_id}'.format(osd_id=osd_id),
3310 elif os.path.exists(os.path.join(path, 'sysvinit')):
3311 if os.path.exists('/usr/sbin/service'):
3312 svc = '/usr/sbin/service'
3314 svc = '/sbin/service'
3320 '{cluster}'.format(cluster=cluster),
3322 'osd.{osd_id}'.format(osd_id=osd_id),
3325 elif os.path.exists(os.path.join(path, 'systemd')):
3326 systemd_start(path, osd_id)
3327 elif os.path.exists(os.path.join(path, 'openrc')):
3328 base_script = '/etc/init.d/ceph-osd'
3329 osd_script = '{base}.{osd_id}'.format(
3333 if not os.path.exists(osd_script):
3334 os.symlink(base_script, osd_script)
3341 elif os.path.exists(os.path.join(path, 'bsdrc')):
3344 '/usr/sbin/service', 'ceph', 'start',
3345 'osd.{osd_id}'.format(osd_id=osd_id),
3349 raise Error('{cluster} osd.{osd_id} '
3350 'is not tagged with an init system'
3355 except subprocess.CalledProcessError as e:
3356 raise Error('ceph osd start failed', e)
3363 LOG.debug('Stoping %s osd.%s...', cluster, osd_id)
3365 path = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3366 cluster=cluster, osd_id=osd_id)
3369 if os.path.exists(os.path.join(path, 'upstart')):
3375 'cluster={cluster}'.format(cluster=cluster),
3376 'id={osd_id}'.format(osd_id=osd_id),
3379 elif os.path.exists(os.path.join(path, 'sysvinit')):
3380 svc = which('service')
3386 '{cluster}'.format(cluster=cluster),
3388 'osd.{osd_id}'.format(osd_id=osd_id),
3391 elif os.path.exists(os.path.join(path, 'systemd')):
3392 systemd_stop(path, osd_id)
3393 elif os.path.exists(os.path.join(path, 'openrc')):
3396 '/etc/init.d/ceph-osd.{osd_id}'.format(osd_id=osd_id),
3400 elif os.path.exists(os.path.join(path, 'bsdrc')):
3403 '/usr/local/etc/rc.d/ceph stop osd.{osd_id}'
3404 .format(osd_id=osd_id),
3408 raise Error('{cluster} osd.{osd_id} '
3409 'is not tagged with an init system'
3410 .format(cluster=cluster, osd_id=osd_id))
3411 except subprocess.CalledProcessError as e:
3412 raise Error('ceph osd stop failed', e)
3415 def detect_fstype(dev):
3417 fstype = _check_output(
3425 fstype = _check_output(
3428 # we don't want stale cached results
3436 fstype = must_be_one_line(fstype)
3440 def dmcrypt_is_mapped(uuid):
3441 path = os.path.join('/dev/mapper', uuid)
3442 if os.path.exists(path):
3448 def dmcrypt_map(dev, dmcrypt_key_dir):
3449 ptype = get_partition_type(dev)
3450 if ptype in Ptype.get_ready_by_type('plain'):
3452 cryptsetup_parameters = ['--key-size', '256']
3453 elif ptype in Ptype.get_ready_by_type('luks'):
3455 cryptsetup_parameters = []
3457 raise Error('--dmcrypt called for dev %s with invalid ptype %s'
3459 part_uuid = get_partition_uuid(dev)
3460 dmcrypt_key = get_dmcrypt_key(part_uuid, dmcrypt_key_dir, luks)
3461 return _dmcrypt_map(
3465 cryptsetup_parameters=cryptsetup_parameters,
3473 activate_key_template,
3481 part_uuid = get_partition_uuid(dev)
3482 dev = dmcrypt_map(dev, dmcrypt_key_dir)
3484 fstype = detect_fstype(dev=dev)
3485 except (subprocess.CalledProcessError,
3487 TooManyLinesError) as e:
3488 raise FilesystemTypeError(
3489 'device {dev}'.format(dev=dev),
3493 # TODO always using mount options from cluster=ceph for
3494 # now; see http://tracker.newdream.net/issues/3253
3495 mount_options = get_mount_options(cluster='ceph', fs_type=fstype)
3497 path = mount(dev=dev, fstype=fstype, options=mount_options)
3499 # check if the disk is deactive, change the journal owner, group
3500 # mode for correct user and group.
3501 if os.path.exists(os.path.join(path, 'deactive')):
3502 # logging to syslog will help us easy to know udev triggered failure
3505 # we need to unmap again because dmcrypt map will create again
3506 # on bootup stage (due to deactivate)
3507 if '/dev/mapper/' in dev:
3508 part_uuid = dev.replace('/dev/mapper/', '')
3509 dmcrypt_unmap(part_uuid)
3510 LOG.info('OSD deactivated! reactivate with: --reactivate')
3511 raise Error('OSD deactivated! reactivate with: --reactivate')
3512 # flag to activate a deactive osd.
3520 (osd_id, cluster) = activate(path, activate_key_template, init)
3522 # Now active successfully
3523 # If we got reactivate and deactive, remove the deactive file
3524 if deactive and reactivate:
3525 os.remove(os.path.join(path, 'deactive'))
3526 LOG.info('Remove `deactive` file.')
3528 # check if the disk is already active, or if something else is already
3532 src_dev = os.stat(path).st_dev
3534 dst_dev = os.stat((STATEDIR + '/osd/{cluster}-{osd_id}').format(
3536 osd_id=osd_id)).st_dev
3537 if src_dev == dst_dev:
3540 parent_dev = os.stat(STATEDIR + '/osd').st_dev
3541 if dst_dev != parent_dev:
3543 elif os.listdir(get_mount_point(cluster, osd_id)):
3544 LOG.info(get_mount_point(cluster, osd_id) +
3545 " is not empty, won't override")
3552 LOG.info('%s osd.%s already mounted in position; unmounting ours.'
3553 % (cluster, osd_id))
3556 raise Error('another %s osd.%s already mounted in position '
3557 '(old/different cluster instance?); unmounting ours.'
3558 % (cluster, osd_id))
3566 mount_options=mount_options,
3568 return cluster, osd_id
3571 LOG.error('Failed to activate')
3575 # remove our temp dir
3576 if os.path.exists(path):
3582 activate_key_template,
3586 if not os.path.exists(path):
3588 'directory %s does not exist' % path
3591 (osd_id, cluster) = activate(path, activate_key_template, init)
3593 if init not in (None, 'none'):
3594 canonical = (STATEDIR + '/osd/{cluster}-{osd_id}').format(
3597 if path != canonical:
3598 # symlink it from the proper location
3600 if os.path.lexists(canonical):
3601 old = os.readlink(canonical)
3603 LOG.debug('Removing old symlink %s -> %s', canonical, old)
3605 os.unlink(canonical)
3607 raise Error('unable to remove old symlink', canonical)
3611 LOG.debug('Creating symlink %s -> %s', canonical, path)
3613 os.symlink(path, canonical)
3615 raise Error('unable to create symlink %s -> %s'
3616 % (canonical, path))
3618 return cluster, osd_id
3621 def find_cluster_by_uuid(_uuid):
3623 Find a cluster name by searching /etc/ceph/*.conf for a conf file
3624 with the right uuid.
3626 _uuid = _uuid.lower()
3628 if not os.path.exists(SYSCONFDIR):
3630 for conf_file in os.listdir(SYSCONFDIR):
3631 if not conf_file.endswith('.conf'):
3633 cluster = conf_file[:-5]
3635 fsid = get_fsid(cluster)
3637 if 'getting cluster uuid from configuration failed' not in str(e):
3639 no_fsid.append(cluster)
3643 # be tolerant of /etc/ceph/ceph.conf without an fsid defined.
3644 if len(no_fsid) == 1 and no_fsid[0] == 'ceph':
3645 LOG.warning('No fsid defined in ' + SYSCONFDIR +
3646 '/ceph.conf; using anyway')
3653 activate_key_template,
3657 check_osd_magic(path)
3659 ceph_fsid = read_one_line(path, 'ceph_fsid')
3660 if ceph_fsid is None:
3661 raise Error('No cluster uuid assigned.')
3662 LOG.debug('Cluster uuid is %s', ceph_fsid)
3664 cluster = find_cluster_by_uuid(ceph_fsid)
3666 raise Error('No cluster conf found in ' + SYSCONFDIR +
3667 ' with fsid %s' % ceph_fsid)
3668 LOG.debug('Cluster name is %s', cluster)
3670 fsid = read_one_line(path, 'fsid')
3672 raise Error('No OSD uuid assigned.')
3673 LOG.debug('OSD uuid is %s', fsid)
3675 keyring = activate_key_template.format(cluster=cluster,
3678 osd_id = get_osd_id(path)
3680 osd_id = allocate_osd_id(
3686 write_one_line(path, 'whoami', osd_id)
3687 LOG.debug('OSD id is %s', osd_id)
3689 if not os.path.exists(os.path.join(path, 'ready')):
3690 LOG.debug('Initializing OSD...')
3691 # re-running mkfs is safe, so just run until it completes
3700 if init not in (None, 'none'):
3702 conf_val = get_conf(
3706 if conf_val is not None:
3711 LOG.debug('Marking with init system %s', init)
3712 init_path = os.path.join(path, init)
3713 with open(init_path, 'w'):
3714 path_set_context(init_path)
3716 # remove markers for others, just in case.
3717 for other in INIT_SYSTEMS:
3720 os.unlink(os.path.join(path, other))
3724 if not os.path.exists(os.path.join(path, 'active')):
3725 write_one_line(path, 'active', 'ok')
3726 LOG.debug('%s osd.%s data dir is ready at %s', cluster, osd_id, path)
3727 return (osd_id, cluster)
3730 def main_activate(args):
3734 LOG.info('path = ' + str(args.path))
3735 if not os.path.exists(args.path):
3736 raise Error('%s does not exist' % args.path)
3738 if is_suppressed(args.path):
3739 LOG.info('suppressed activate request on %s', args.path)
3743 mode = os.stat(args.path).st_mode
3744 if stmode_is_diskdevice(mode):
3745 if (is_partition(args.path) and
3746 (get_partition_type(args.path) ==
3747 PTYPE['mpath']['osd']['ready']) and
3748 not is_mpath(args.path)):
3749 raise Error('%s is not a multipath block device' %
3751 (cluster, osd_id) = mount_activate(
3753 activate_key_template=args.activate_key_template,
3754 init=args.mark_init,
3755 dmcrypt=args.dmcrypt,
3756 dmcrypt_key_dir=args.dmcrypt_key_dir,
3757 reactivate=args.reactivate,
3759 osd_data = get_mount_point(cluster, osd_id)
3761 args.cluster = cluster
3763 for name in Space.NAMES:
3764 # Check if encrypted device in journal
3765 dev_path = os.path.join(osd_data, name + '_dmcrypt')
3766 if not os.path.exists(dev_path):
3768 partition = DevicePartition.factory(
3772 partition.rawdev = args.path
3775 elif stat.S_ISDIR(mode):
3776 (cluster, osd_id) = activate_dir(
3778 activate_key_template=args.activate_key_template,
3779 init=args.mark_init,
3781 osd_data = args.path
3784 raise Error('%s is not a directory or block device' % args.path)
3786 # exit with 0 if the journal device is not up, yet
3787 # journal device will do the activation
3788 osd_journal = '{path}/journal'.format(path=osd_data)
3789 if os.path.islink(osd_journal) and not os.access(osd_journal, os.F_OK):
3790 LOG.info("activate: Journal not present, not starting, yet")
3793 if (not args.no_start_daemon and args.mark_init == 'none'):
3797 '--cluster={cluster}'.format(cluster=cluster),
3798 '--id={osd_id}'.format(osd_id=osd_id),
3799 '--osd-data={path}'.format(path=osd_data),
3800 '--osd-journal={journal}'.format(journal=osd_journal),
3804 if (not args.no_start_daemon and
3805 args.mark_init not in (None, 'none')):
3813 def main_activate_lockbox(args):
3815 main_activate_lockbox_protected(args)
3818 def main_activate_lockbox_protected(args):
3819 partition = DevicePartition.factory(
3820 path=None, dev=args.path, args=args)
3822 lockbox = Lockbox(args)
3823 lockbox.set_partition(partition)
3827 ###########################
3829 def _mark_osd_out(cluster, osd_id):
3830 LOG.info('Prepare to mark osd.%d out...', osd_id)
3839 def _check_osd_status(cluster, osd_id):
3841 report the osd status:
3842 00(0) : means OSD OUT AND DOWN
3843 01(1) : means OSD OUT AND UP
3844 10(2) : means OSD IN AND DOWN
3845 11(3) : means OSD IN AND UP
3847 LOG.info("Checking osd id: %s ..." % osd_id)
3850 out, err, ret = command([
3854 '--cluster={cluster}'.format(
3860 out_json = json.loads(out)
3861 for item in out_json[u'osds']:
3862 if item.get(u'osd') == int(osd_id):
3864 if item.get(u'in') is 1:
3866 if item.get(u'up') is 1:
3869 raise Error('Could not osd.%s in osd tree!' % osd_id)
3873 def _remove_osd_directory_files(mounted_path, cluster):
3875 To remove the 'ready', 'active', INIT-specific files.
3877 if os.path.exists(os.path.join(mounted_path, 'ready')):
3878 os.remove(os.path.join(mounted_path, 'ready'))
3879 LOG.info('Remove `ready` file.')
3881 LOG.info('`ready` file is already removed.')
3883 if os.path.exists(os.path.join(mounted_path, 'active')):
3884 os.remove(os.path.join(mounted_path, 'active'))
3885 LOG.info('Remove `active` file.')
3887 LOG.info('`active` file is already removed.')
3889 # Just check `upstart` and `sysvinit` directly if filename is init-spec.
3890 conf_val = get_conf(
3894 if conf_val is not None:
3898 os.remove(os.path.join(mounted_path, init))
3899 LOG.info('Remove `%s` file.', init)
3903 def main_deactivate(args):
3905 main_deactivate_locked(args)
3908 def main_deactivate_locked(args):
3909 osd_id = args.deactivate_by_id
3913 devices = list_devices()
3915 # list all devices and found we need
3916 for device in devices:
3917 if 'partitions' in device:
3918 for dev_part in device.get('partitions'):
3920 'whoami' in dev_part and
3921 dev_part['whoami'] == osd_id):
3922 target_dev = dev_part
3924 'path' in dev_part and
3925 dev_part['path'] == path):
3926 target_dev = dev_part
3928 raise Error('Cannot find any match device!!')
3930 # set up all we need variable
3931 osd_id = target_dev['whoami']
3932 part_type = target_dev['ptype']
3933 mounted_path = target_dev['mount']
3934 if Ptype.is_dmcrypt(part_type, 'osd'):
3937 # Do not do anything if osd is already down.
3938 status_code = _check_osd_status(args.cluster, osd_id)
3939 if status_code == OSD_STATUS_IN_UP:
3940 if args.mark_out is True:
3941 _mark_osd_out(args.cluster, int(osd_id))
3942 stop_daemon(args.cluster, osd_id)
3943 elif status_code == OSD_STATUS_IN_DOWN:
3944 if args.mark_out is True:
3945 _mark_osd_out(args.cluster, int(osd_id))
3946 LOG.info("OSD already out/down. Do not do anything now.")
3948 elif status_code == OSD_STATUS_OUT_UP:
3949 stop_daemon(args.cluster, osd_id)
3950 elif status_code == OSD_STATUS_OUT_DOWN:
3951 LOG.info("OSD already out/down. Do not do anything now.")
3955 # remove 'ready', 'active', and INIT-specific files.
3956 _remove_osd_directory_files(mounted_path, args.cluster)
3958 # Write deactivate to osd directory!
3959 with open(os.path.join(mounted_path, 'deactive'), 'w'):
3960 path_set_context(os.path.join(mounted_path, 'deactive'))
3962 unmount(mounted_path, do_rm=not args.once)
3963 LOG.info("Umount `%s` successfully.", mounted_path)
3966 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3967 command(['umount', os.path.join(lockbox, target_dev['uuid'])])
3969 dmcrypt_unmap(target_dev['uuid'])
3970 for name in Space.NAMES:
3971 if name + '_uuid' in target_dev:
3972 dmcrypt_unmap(target_dev[name + '_uuid'])
3974 ###########################
3977 def _remove_lockbox(uuid, cluster):
3978 lockbox = os.path.join(STATEDIR, 'osd-lockbox')
3979 if not os.path.exists(lockbox):
3981 canonical = os.path.join(lockbox, uuid)
3982 command(['umount', canonical])
3983 for name in os.listdir(lockbox):
3984 path = os.path.join(lockbox, name)
3985 if os.path.islink(path) and os.readlink(path) == canonical:
3989 def destroy_lookup_device(args, predicate, description):
3990 devices = list_devices()
3991 for device in devices:
3992 for partition in device.get('partitions', []):
3993 if partition['type'] == 'lockbox':
3994 if not is_mounted(partition['path']):
3995 main_activate_lockbox_protected(
3996 argparse.Namespace(verbose=args.verbose,
3997 path=partition['path']))
3998 for device in devices:
3999 for partition in device.get('partitions', []):
4000 if partition['dmcrypt']:
4001 dmcrypt_path = dmcrypt_is_mapped(partition['uuid'])
4005 dmcrypt_path = dmcrypt_map(partition['path'],
4006 args.dmcrypt_key_dir)
4008 list_dev_osd(dmcrypt_path, {}, partition)
4010 dmcrypt_unmap(partition['uuid'])
4014 if predicate(partition):
4015 return dmcrypt, partition
4016 raise Error('found no device matching ', description)
4019 def main_destroy(args):
4021 main_destroy_locked(args)
4024 def main_destroy_locked(args):
4025 osd_id = args.destroy_by_id
4030 if not is_partition(path):
4031 raise Error(path + " must be a partition device")
4032 path = os.path.realpath(path)
4035 (dmcrypt, target_dev) = destroy_lookup_device(
4036 args, lambda x: x.get('path') == path,
4039 (dmcrypt, target_dev) = destroy_lookup_device(
4040 args, lambda x: x.get('whoami') == osd_id,
4041 'osd id ' + str(osd_id))
4043 osd_id = target_dev['whoami']
4044 dev_path = target_dev['path']
4045 if target_dev['ptype'] == PTYPE['mpath']['osd']['ready']:
4046 base_dev = get_partition_base_mpath(dev_path)
4048 base_dev = get_partition_base(dev_path)
4050 # Before osd deactivate, we cannot destroy it
4051 status_code = _check_osd_status(args.cluster, osd_id)
4052 if status_code != OSD_STATUS_OUT_DOWN and \
4053 status_code != OSD_STATUS_IN_DOWN:
4054 raise Error("Could not destroy the active osd. (osd-id: %s)" %
4061 LOG.info("Prepare to %s osd.%s" % (action, osd_id))
4067 '--yes-i-really-mean-it',
4070 # we remove the crypt map and device mapper (if dmcrypt is True)
4072 for name in Space.NAMES:
4073 if target_dev.get(name + '_uuid'):
4074 dmcrypt_unmap(target_dev[name + '_uuid'])
4075 _remove_lockbox(target_dev['uuid'], args.cluster)
4077 # Check zap flag. If we found zap flag, we need to find device for
4078 # destroy this osd data.
4079 if args.zap is True:
4080 # erase the osd data
4081 LOG.info("Prepare to zap the device %s" % base_dev)
4085 def get_space_osd_uuid(name, path):
4086 if not os.path.exists(path):
4087 raise Error('%s does not exist' % path)
4089 if not path_is_diskdevice(path):
4090 raise Error('%s is not a block device' % path)
4092 if (is_partition(path) and
4093 get_partition_type(path) in (PTYPE['mpath']['journal']['ready'],
4094 PTYPE['mpath']['block']['ready']) and
4095 not is_mpath(path)):
4096 raise Error('%s is not a multipath block device' %
4100 out = _check_output(
4103 '--get-device-fsid',
4108 except subprocess.CalledProcessError as e:
4110 'failed to get osd uuid/fsid from %s' % name,
4113 value = str(out).split('\n', 1)[0]
4114 LOG.debug('%s %s has OSD UUID %s', name.capitalize(), path, value)
4118 def main_activate_space(name, args):
4119 if not os.path.exists(args.dev):
4120 raise Error('%s does not exist' % args.dev)
4122 if is_suppressed(args.dev):
4123 LOG.info('suppressed activate request on space %s', args.dev)
4132 dev = dmcrypt_map(args.dev, args.dmcrypt_key_dir)
4135 # FIXME: For an encrypted journal dev, does this return the
4136 # cyphertext or plaintext dev uuid!? Also, if the journal is
4137 # encrypted, is the data partition also always encrypted, or
4138 # are mixed pairs supported!?
4139 osd_uuid = get_space_osd_uuid(name, dev)
4140 path = os.path.join('/dev/disk/by-partuuid/', osd_uuid.lower())
4142 if is_suppressed(path):
4143 LOG.info('suppressed activate request on %s', path)
4146 # warn and exit with 0 if the data device is not up, yet
4147 # data device will do the activation
4148 if not os.access(path, os.F_OK):
4149 LOG.info("activate: OSD device not present, not starting, yet")
4152 (cluster, osd_id) = mount_activate(
4154 activate_key_template=args.activate_key_template,
4155 init=args.mark_init,
4156 dmcrypt=args.dmcrypt,
4157 dmcrypt_key_dir=args.dmcrypt_key_dir,
4158 reactivate=args.reactivate,
4167 ###########################
4170 def main_activate_all(args):
4171 dir = '/dev/disk/by-parttypeuuid'
4172 LOG.debug('Scanning %s', dir)
4173 if not os.path.exists(dir):
4176 for name in os.listdir(dir):
4177 if name.find('.') < 0:
4179 (tag, uuid) = name.split('.')
4181 if tag in Ptype.get_ready_by_name('osd'):
4183 if Ptype.is_dmcrypt(tag, 'osd'):
4184 path = os.path.join('/dev/mapper', uuid)
4186 path = os.path.join(dir, name)
4188 if is_suppressed(path):
4189 LOG.info('suppressed activate request on %s', path)
4192 LOG.info('Activating %s', path)
4195 # never map dmcrypt cyphertext devices
4196 (cluster, osd_id) = mount_activate(
4198 activate_key_template=args.activate_key_template,
4199 init=args.mark_init,
4208 except Exception as e:
4210 '{prog}: {msg}'.format(prog=args.prog, msg=e),
4217 raise Error('One or more partitions failed to activate')
4220 ###########################
4223 dev = os.path.realpath(dev)
4224 with open(PROCDIR + '/swaps', 'rb') as proc_swaps:
4225 for line in proc_swaps.readlines()[1:]:
4226 fields = line.split()
4229 swaps_dev = fields[0]
4230 if os.path.isabs(swaps_dev) and os.path.exists(swaps_dev):
4231 swaps_dev = os.path.realpath(swaps_dev)
4232 if swaps_dev == dev:
4237 def get_oneliner(base, name):
4238 path = os.path.join(base, name)
4239 if os.path.isfile(path):
4240 with open(path, 'rb') as _file:
4241 return _bytes2str(_file.readline().rstrip())
4245 def get_dev_fs(dev):
4247 fstype, _, ret = command(
4257 fscheck, _, _ = command(
4265 if 'TYPE' in fscheck:
4266 fstype = fscheck.split()[1].split('"')[1]
4271 def split_dev_base_partnum(dev):
4273 partnum = partnum_mpath(dev)
4274 base = get_partition_base_mpath(dev)
4277 partnum = open(os.path.join(b, 'partition')).read().strip()
4278 base = get_partition_base(dev)
4279 return base, partnum
4282 def get_partition_type(part):
4283 return get_blkid_partition_info(part, 'ID_PART_ENTRY_TYPE')
4286 def get_partition_uuid(part):
4287 return get_blkid_partition_info(part, 'ID_PART_ENTRY_UUID')
4290 def get_blkid_partition_info(dev, what=None):
4291 out, _, _ = command(
4301 for line in out.splitlines():
4302 (key, value) = line.split('=')
4310 def more_osd_info(path, uuid_map, desc):
4311 desc['ceph_fsid'] = get_oneliner(path, 'ceph_fsid')
4312 if desc['ceph_fsid']:
4313 desc['cluster'] = find_cluster_by_uuid(desc['ceph_fsid'])
4314 desc['whoami'] = get_oneliner(path, 'whoami')
4315 for name in Space.NAMES:
4316 uuid = get_oneliner(path, name + '_uuid')
4318 desc[name + '_uuid'] = uuid.lower()
4319 if desc[name + '_uuid'] in uuid_map:
4320 desc[name + '_dev'] = uuid_map[desc[name + '_uuid']]
4323 def list_dev_osd(dev, uuid_map, desc):
4324 desc['mount'] = is_mounted(dev)
4325 desc['fs_type'] = get_dev_fs(dev)
4326 desc['state'] = 'unprepared'
4328 desc['state'] = 'active'
4329 more_osd_info(desc['mount'], uuid_map, desc)
4330 elif desc['fs_type']:
4332 tpath = mount(dev=dev, fstype=desc['fs_type'], options='')
4335 magic = get_oneliner(tpath, 'magic')
4336 if magic is not None:
4337 desc['magic'] = magic
4338 desc['state'] = 'prepared'
4339 more_osd_info(tpath, uuid_map, desc)
4346 def list_dev_lockbox(dev, uuid_map, desc):
4347 desc['mount'] = is_mounted(dev)
4348 desc['fs_type'] = get_dev_fs(dev)
4349 desc['state'] = 'unprepared'
4351 desc['state'] = 'active'
4352 desc['osd_uuid'] = get_oneliner(desc['mount'], 'osd-uuid')
4353 elif desc['fs_type']:
4355 tpath = tempfile.mkdtemp(prefix='mnt.', dir=STATEDIR + '/tmp')
4356 args = ['mount', '-t', 'ext4', dev, tpath]
4357 LOG.debug('Mounting lockbox ' + str(" ".join(args)))
4358 command_check_call(args)
4359 magic = get_oneliner(tpath, 'magic')
4360 if magic is not None:
4361 desc['magic'] = magic
4362 desc['state'] = 'prepared'
4363 desc['osd_uuid'] = get_oneliner(tpath, 'osd-uuid')
4365 except subprocess.CalledProcessError:
4367 if desc.get('osd_uuid') in uuid_map:
4368 desc['lockbox_for'] = uuid_map[desc['osd_uuid']]
4371 def list_format_lockbox_plain(dev):
4373 if dev.get('lockbox_for'):
4374 desc.append('for ' + dev['lockbox_for'])
4375 elif dev.get('osd_uuid'):
4376 desc.append('for osd ' + dev['osd_uuid'])
4380 def list_format_more_osd_info_plain(dev):
4382 if dev.get('ceph_fsid'):
4383 if dev.get('cluster'):
4384 desc.append('cluster ' + dev['cluster'])
4386 desc.append('unknown cluster ' + dev['ceph_fsid'])
4387 if dev.get('whoami'):
4388 desc.append('osd.%s' % dev['whoami'])
4389 for name in Space.NAMES:
4390 if dev.get(name + '_dev'):
4391 desc.append(name + ' %s' % dev[name + '_dev'])
4395 def list_format_dev_plain(dev, prefix=''):
4397 if dev['ptype'] == PTYPE['regular']['osd']['ready']:
4398 desc = (['ceph data', dev['state']] +
4399 list_format_more_osd_info_plain(dev))
4400 elif dev['ptype'] in (PTYPE['regular']['lockbox']['ready'],
4401 PTYPE['mpath']['lockbox']['ready']):
4402 desc = (['ceph lockbox', dev['state']] +
4403 list_format_lockbox_plain(dev))
4404 elif Ptype.is_dmcrypt(dev['ptype'], 'osd'):
4405 dmcrypt = dev['dmcrypt']
4406 if not dmcrypt['holders']:
4407 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4408 'not currently mapped']
4409 elif len(dmcrypt['holders']) == 1:
4410 holder = get_dev_path(dmcrypt['holders'][0])
4411 desc = ['ceph data (dmcrypt %s %s)' %
4412 (dmcrypt['type'], holder)]
4413 desc += list_format_more_osd_info_plain(dev)
4415 desc = ['ceph data (dmcrypt %s)' % dmcrypt['type'],
4416 'holders: ' + ','.join(dmcrypt['holders'])]
4417 elif Ptype.is_regular_space(dev['ptype']):
4418 name = Ptype.space_ptype_to_name(dev['ptype'])
4419 desc.append('ceph ' + name)
4420 if dev.get(name + '_for'):
4421 desc.append('for %s' % dev[name + '_for'])
4422 elif Ptype.is_dmcrypt_space(dev['ptype']):
4423 name = Ptype.space_ptype_to_name(dev['ptype'])
4424 dmcrypt = dev['dmcrypt']
4425 if dmcrypt['holders'] and len(dmcrypt['holders']) == 1:
4426 holder = get_dev_path(dmcrypt['holders'][0])
4427 desc = ['ceph ' + name + ' (dmcrypt %s %s)' %
4428 (dmcrypt['type'], holder)]
4430 desc = ['ceph ' + name + ' (dmcrypt %s)' % dmcrypt['type']]
4431 if dev.get(name + '_for'):
4432 desc.append('for %s' % dev[name + '_for'])
4434 desc.append(dev['type'])
4435 if dev.get('fs_type'):
4436 desc.append(dev['fs_type'])
4437 elif dev.get('ptype'):
4438 desc.append(dev['ptype'])
4439 if dev.get('mount'):
4440 desc.append('mounted on %s' % dev['mount'])
4441 return '%s%s %s' % (prefix, dev['path'], ', '.join(desc))
4444 def list_format_plain(devices):
4446 for device in devices:
4447 if device.get('partitions'):
4448 lines.append('%s :' % device['path'])
4449 for p in sorted(device['partitions'], key=lambda x: x['path']):
4450 lines.append(list_format_dev_plain(dev=p,
4453 lines.append(list_format_dev_plain(dev=device,
4455 return "\n".join(lines)
4458 def list_dev(dev, uuid_map, space_map):
4464 info['is_partition'] = is_partition(dev)
4465 if info['is_partition']:
4466 ptype = get_partition_type(dev)
4467 info['uuid'] = get_partition_uuid(dev)
4470 info['ptype'] = ptype
4471 LOG.info("list_dev(dev = " + dev + ", ptype = " + str(ptype) + ")")
4472 if ptype in (PTYPE['regular']['osd']['ready'],
4473 PTYPE['mpath']['osd']['ready']):
4474 info['type'] = 'data'
4475 if ptype == PTYPE['mpath']['osd']['ready']:
4476 info['multipath'] = True
4477 list_dev_osd(dev, uuid_map, info)
4478 elif ptype in (PTYPE['regular']['lockbox']['ready'],
4479 PTYPE['mpath']['lockbox']['ready']):
4480 info['type'] = 'lockbox'
4481 if ptype == PTYPE['mpath']['osd']['ready']:
4482 info['multipath'] = True
4483 list_dev_lockbox(dev, uuid_map, info)
4484 elif ptype == PTYPE['plain']['osd']['ready']:
4485 holders = is_held(dev)
4486 info['type'] = 'data'
4487 info['dmcrypt']['holders'] = holders
4488 info['dmcrypt']['type'] = 'plain'
4489 if len(holders) == 1:
4490 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4491 elif ptype == PTYPE['luks']['osd']['ready']:
4492 holders = is_held(dev)
4493 info['type'] = 'data'
4494 info['dmcrypt']['holders'] = holders
4495 info['dmcrypt']['type'] = 'LUKS'
4496 if len(holders) == 1:
4497 list_dev_osd(get_dev_path(holders[0]), uuid_map, info)
4498 elif Ptype.is_regular_space(ptype) or Ptype.is_mpath_space(ptype):
4499 name = Ptype.space_ptype_to_name(ptype)
4501 if ptype == PTYPE['mpath'][name]['ready']:
4502 info['multipath'] = True
4503 if info.get('uuid') in space_map:
4504 info[name + '_for'] = space_map[info['uuid']]
4505 elif Ptype.is_plain_space(ptype):
4506 name = Ptype.space_ptype_to_name(ptype)
4507 holders = is_held(dev)
4509 info['dmcrypt']['type'] = 'plain'
4510 info['dmcrypt']['holders'] = holders
4511 if info.get('uuid') in space_map:
4512 info[name + '_for'] = space_map[info['uuid']]
4513 elif Ptype.is_luks_space(ptype):
4514 name = Ptype.space_ptype_to_name(ptype)
4515 holders = is_held(dev)
4517 info['dmcrypt']['type'] = 'LUKS'
4518 info['dmcrypt']['holders'] = holders
4519 if info.get('uuid') in space_map:
4520 info[name + '_for'] = space_map[info['uuid']]
4522 path = is_mounted(dev)
4523 fs_type = get_dev_fs(dev)
4525 info['type'] = 'swap'
4527 info['type'] = 'other'
4529 info['fs_type'] = fs_type
4531 info['mount'] = path
4537 partmap = list_all_partitions()
4541 for base, parts in sorted(partmap.items()):
4543 dev = get_dev_path(p)
4544 part_uuid = get_partition_uuid(dev)
4546 uuid_map[part_uuid] = dev
4547 ptype = get_partition_type(dev)
4548 LOG.debug("main_list: " + dev +
4549 " ptype = " + str(ptype) +
4550 " uuid = " + str(part_uuid))
4551 if ptype in Ptype.get_ready_by_name('osd'):
4552 if Ptype.is_dmcrypt(ptype, 'osd'):
4553 holders = is_held(dev)
4554 if len(holders) != 1:
4556 dev_to_mount = get_dev_path(holders[0])
4560 fs_type = get_dev_fs(dev_to_mount)
4561 if fs_type is not None:
4562 mount_options = get_mount_options(cluster='ceph',
4565 tpath = mount(dev=dev_to_mount,
4566 fstype=fs_type, options=mount_options)
4568 for name in Space.NAMES:
4569 space_uuid = get_oneliner(tpath,
4572 space_map[space_uuid.lower()] = dev
4578 LOG.debug("main_list: " + str(partmap) + ", uuid_map = " +
4579 str(uuid_map) + ", space_map = " + str(space_map))
4582 for base, parts in sorted(partmap.items()):
4584 disk = {'path': get_dev_path(base)}
4586 for p in sorted(parts):
4587 partitions.append(list_dev(get_dev_path(p),
4590 disk['partitions'] = partitions
4591 devices.append(disk)
4593 device = list_dev(get_dev_path(base), uuid_map, space_map)
4594 device['path'] = get_dev_path(base)
4595 devices.append(device)
4596 LOG.debug("list_devices: " + str(devices))
4602 out, err, ret = command(
4606 '-o', 'name,mountpoint'
4609 except subprocess.CalledProcessError as e:
4610 LOG.info('zfs list -o name,mountpoint '
4611 'fails.\n (Error: %s)' % e)
4613 lines = out.splitlines()
4614 for line in lines[1:]:
4615 vdevline = line.split()
4616 if os.path.exists(os.path.join(vdevline[1], 'active')):
4617 elems = os.path.split(vdevline[1])
4618 print(vdevline[0], "ceph data, active, cluster ceph,", elems[1],
4619 "mounted on:", vdevline[1])
4621 print(vdevline[0] + " other, zfs, mounted on: " + vdevline[1])
4624 def main_list(args):
4627 main_list_freebsd(args)
4629 main_list_protected(args)
4632 def main_list_protected(args):
4633 devices = list_devices()
4636 for path in args.path:
4637 if os.path.exists(path):
4638 paths.append(os.path.realpath(path))
4641 selected_devices = []
4642 for device in devices:
4644 if re.search(path + '$', device['path']):
4645 selected_devices.append(device)
4647 selected_devices = devices
4648 if args.format == 'json':
4649 print(json.dumps(selected_devices))
4651 output = list_format_plain(selected_devices)
4656 def main_list_freebsd(args):
4657 # Currently accomodate only ZFS Filestore partitions
4658 # return a list of VDEVs and mountpoints
4660 # NAME USED AVAIL REFER MOUNTPOINT
4661 # osd0 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.0
4662 # osd1 1.01G 1.32T 1.01G /var/lib/ceph/osd/osd.1
4666 ###########################
4668 # Mark devices that we want to suppress activates on with a
4671 # /var/lib/ceph/tmp/suppress-activate.sdb
4673 # where the last bit is the sanitized device name (/dev/X without the
4674 # /dev/ prefix) and the is_suppress() check matches a prefix. That
4675 # means suppressing sdb will stop activate on sdb1, sdb2, etc.
4678 def is_suppressed(path):
4679 disk = os.path.realpath(path)
4681 if (not disk.startswith('/dev/') or
4682 not ldev_is_diskdevice(disk)):
4684 base = get_dev_name(disk)
4686 if os.path.exists(SUPPRESS_PREFIX + base): # noqa
4693 def set_suppress(path):
4694 disk = os.path.realpath(path)
4695 if not os.path.exists(disk):
4696 raise Error('does not exist', path)
4697 if not ldev_is_diskdevice(path):
4698 raise Error('not a block device', path)
4699 base = get_dev_name(disk)
4701 with open(SUPPRESS_PREFIX + base, 'w') as f: # noqa
4703 LOG.info('set suppress flag on %s', base)
4706 def unset_suppress(path):
4707 disk = os.path.realpath(path)
4708 if not os.path.exists(disk):
4709 raise Error('does not exist', path)
4710 if not ldev_is_diskdevice(path):
4711 raise Error('not a block device', path)
4712 assert disk.startswith('/dev/')
4713 base = get_dev_name(disk)
4715 fn = SUPPRESS_PREFIX + base # noqa
4716 if not os.path.exists(fn):
4717 raise Error('not marked as suppressed', path)
4721 LOG.info('unset suppress flag on %s', base)
4722 except OSError as e:
4723 raise Error('failed to unsuppress', e)
4726 def main_suppress(args):
4727 set_suppress(args.path)
4730 def main_unsuppress(args):
4731 unset_suppress(args.path)
4735 for dev in args.dev:
4739 def main_trigger(args):
4740 LOG.debug("main_trigger: " + str(args))
4741 if is_systemd() and not args.sync:
4742 # http://www.freedesktop.org/software/systemd/man/systemd-escape.html
4743 escaped_dev = args.dev[1:].replace('-', '\\x2d')
4744 service = 'ceph-disk@{dev}.service'.format(dev=escaped_dev)
4745 LOG.info('systemd detected, triggering %s' % service)
4755 if is_upstart() and not args.sync:
4756 LOG.info('upstart detected, triggering ceph-disk task')
4762 'dev={dev}'.format(dev=args.dev),
4763 'pid={pid}'.format(pid=os.getpid()),
4768 if get_ceph_user() == 'ceph':
4769 command_check_call(['chown', 'ceph:ceph', args.dev])
4770 parttype = get_partition_type(args.dev)
4771 partid = get_partition_uuid(args.dev)
4773 LOG.info('trigger {dev} parttype {parttype} uuid {partid}'.format(
4779 ceph_disk = ['ceph-disk']
4781 ceph_disk.append('--verbose')
4783 if parttype in (PTYPE['regular']['osd']['ready'],
4784 PTYPE['mpath']['osd']['ready']):
4785 out, err, ret = command(
4793 elif parttype in (PTYPE['plain']['osd']['ready'],
4794 PTYPE['luks']['osd']['ready']):
4795 out, err, ret = command(
4804 elif parttype in (PTYPE['regular']['journal']['ready'],
4805 PTYPE['mpath']['journal']['ready']):
4806 out, err, ret = command(
4814 elif parttype in (PTYPE['plain']['journal']['ready'],
4815 PTYPE['luks']['journal']['ready']):
4816 out, err, ret = command(
4825 elif parttype in (PTYPE['regular']['block']['ready'],
4826 PTYPE['regular']['block.db']['ready'],
4827 PTYPE['regular']['block.wal']['ready'],
4828 PTYPE['mpath']['block']['ready'],
4829 PTYPE['mpath']['block.db']['ready'],
4830 PTYPE['mpath']['block.wal']['ready']):
4831 out, err, ret = command(
4839 elif parttype in (PTYPE['plain']['block']['ready'],
4840 PTYPE['plain']['block.db']['ready'],
4841 PTYPE['plain']['block.wal']['ready'],
4842 PTYPE['luks']['block']['ready'],
4843 PTYPE['luks']['block.db']['ready'],
4844 PTYPE['luks']['block.wal']['ready']):
4845 out, err, ret = command(
4854 elif parttype in (PTYPE['regular']['lockbox']['ready'],
4855 PTYPE['mpath']['lockbox']['ready']):
4856 out, err, ret = command(
4865 raise Error('unrecognized partition type %s' % parttype)
4870 raise Error('return code ' + str(ret))
4877 # A hash table containing 'path': ('uid', 'gid', blocking, recursive)
4879 ('/usr/bin/ceph-mon', 'root', ROOTGROUP, True, False),
4880 ('/usr/bin/ceph-mds', 'root', ROOTGROUP, True, False),
4881 ('/usr/bin/ceph-osd', 'root', ROOTGROUP, True, False),
4882 ('/usr/bin/radosgw', 'root', ROOTGROUP, True, False),
4883 ('/etc/ceph', 'root', ROOTGROUP, True, True),
4884 ('/var/run/ceph', 'ceph', 'ceph', True, True),
4885 ('/var/log/ceph', 'ceph', 'ceph', True, True),
4886 ('/var/log/radosgw', 'ceph', 'ceph', True, True),
4887 ('/var/lib/ceph', 'ceph', 'ceph', True, False),
4890 # Relabel/chown all files under /var/lib/ceph/ recursively (except for osd)
4891 for directory in glob.glob('/var/lib/ceph/*'):
4892 if directory == '/var/lib/ceph/osd':
4893 fix_table.append((directory, 'ceph', 'ceph', True, False))
4895 fix_table.append((directory, 'ceph', 'ceph', True, True))
4897 # Relabel/chown the osds recursively and in parallel
4898 for directory in glob.glob('/var/lib/ceph/osd/*'):
4899 fix_table.append((directory, 'ceph', 'ceph', False, True))
4901 LOG.debug("fix_table: " + str(fix_table))
4903 # The lists of background processes
4905 permissions_processes = []
4906 selinux_processes = []
4908 # Preliminary checks
4909 if args.selinux or args.all:
4910 out, err, ret = command(['selinuxenabled'])
4912 LOG.error('SELinux is not enabled, please enable it, first.')
4913 raise Error('no SELinux')
4915 for daemon in ['ceph-mon', 'ceph-osd', 'ceph-mds', 'radosgw', 'ceph-mgr']:
4916 out, err, ret = command(['pgrep', daemon])
4918 LOG.error(daemon + ' is running, please stop it, first')
4919 raise Error(daemon + ' running')
4921 # Relabel the basic system data without the ceph files
4922 if args.system or args.all:
4923 c = ['restorecon', '-R', '/']
4924 for directory, _, _, _, _ in fix_table:
4925 # Skip /var/lib/ceph subdirectories
4926 if directory.startswith('/var/lib/ceph/'):
4931 out, err, ret = command(c)
4934 LOG.error("Failed to restore labels of the underlying system")
4936 raise Error("basic restore failed")
4938 # Use find to relabel + chown ~simultaenously
4940 for directory, uid, gid, blocking, recursive in fix_table:
4941 # Skip directories/files that are not installed
4942 if not os.access(directory, os.F_OK):
4950 ':'.join((uid, gid)),
4959 # Just pass -maxdepth 0 for non-recursive calls
4961 c += ['-maxdepth', '0']
4964 out, err, ret = command(c)
4967 LOG.error("Failed to fix " + directory)
4969 raise Error(directory + " fix failed")
4971 all_processes.append(command_init(c))
4973 LOG.debug("all_processes: " + str(all_processes))
4974 for process in all_processes:
4975 out, err, ret = command_wait(process)
4977 LOG.error("A background find process failed")
4979 raise Error("background failed")
4982 if args.permissions:
4983 for directory, uid, gid, blocking, recursive in fix_table:
4984 # Skip directories/files that are not installed
4985 if not os.access(directory, os.F_OK):
4992 ':'.join((uid, gid)),
4998 ':'.join((uid, gid)),
5003 out, err, ret = command(c)
5006 LOG.error("Failed to chown " + directory)
5008 raise Error(directory + " chown failed")
5010 permissions_processes.append(command_init(c))
5012 LOG.debug("permissions_processes: " + str(permissions_processes))
5013 for process in permissions_processes:
5014 out, err, ret = command_wait(process)
5016 LOG.error("A background permissions process failed")
5018 raise Error("background failed")
5020 # Fix SELinux labels
5022 for directory, uid, gid, blocking, recursive in fix_table:
5023 # Skip directories/files that are not installed
5024 if not os.access(directory, os.F_OK):
5040 out, err, ret = command(c)
5043 LOG.error("Failed to restore labels for " + directory)
5045 raise Error(directory + " relabel failed")
5047 selinux_processes.append(command_init(c))
5049 LOG.debug("selinux_processes: " + str(selinux_processes))
5050 for process in selinux_processes:
5051 out, err, ret = command_wait(process)
5053 LOG.error("A background selinux process failed")
5055 raise Error("background failed")
5058 "The ceph files has been fixed, please reboot "
5059 "the system for the changes to take effect."
5063 def setup_statedir(dir):
5064 # XXX The following use of globals makes linting
5065 # really hard. Global state in Python is iffy and
5066 # should be avoided.
5070 if not os.path.exists(STATEDIR):
5072 if not os.path.exists(STATEDIR + "/tmp"):
5073 os.mkdir(STATEDIR + "/tmp")
5076 prepare_lock = FileLock(STATEDIR + '/tmp/ceph-disk.prepare.lock')
5078 global activate_lock
5079 activate_lock = FileLock(STATEDIR + '/tmp/ceph-disk.activate.lock')
5081 global SUPPRESS_PREFIX
5082 SUPPRESS_PREFIX = STATEDIR + '/tmp/suppress-activate.'
5085 def setup_sysconfdir(dir):
5090 def parse_args(argv):
5091 parser = argparse.ArgumentParser(
5094 parser.add_argument(
5096 action='store_true', default=None,
5097 help='be more verbose',
5099 parser.add_argument(
5101 action='store_true', default=None,
5102 help='log to stdout',
5104 parser.add_argument(
5105 '--prepend-to-path',
5108 help=('prepend PATH to $PATH for backward compatibility '
5109 '(default /usr/bin)'),
5111 parser.add_argument(
5114 default='/var/lib/ceph',
5115 help=('directory in which ceph state is preserved '
5116 '(default /var/lib/ceph)'),
5118 parser.add_argument(
5121 default='/etc/ceph',
5122 help=('directory in which ceph configuration files are found '
5123 '(default /etc/ceph)'),
5125 parser.add_argument(
5129 help='use the given user for subprocesses, rather than ceph or root'
5131 parser.add_argument(
5135 help='use the given group for subprocesses, rather than ceph or root'
5137 parser.set_defaults(
5138 # we want to hold on to this, for later
5142 subparsers = parser.add_subparsers(
5143 title='subcommands',
5144 description='valid subcommands',
5145 help='sub-command help',
5148 Prepare.set_subparser(subparsers)
5149 make_activate_parser(subparsers)
5150 make_activate_lockbox_parser(subparsers)
5151 make_activate_block_parser(subparsers)
5152 make_activate_journal_parser(subparsers)
5153 make_activate_all_parser(subparsers)
5154 make_list_parser(subparsers)
5155 make_suppress_parser(subparsers)
5156 make_deactivate_parser(subparsers)
5157 make_destroy_parser(subparsers)
5158 make_zap_parser(subparsers)
5159 make_trigger_parser(subparsers)
5160 make_fix_parser(subparsers)
5162 args = parser.parse_args(argv)
5166 def make_fix_parser(subparsers):
5167 fix_parser = subparsers.add_parser(
5169 formatter_class=argparse.RawDescriptionHelpFormatter,
5170 description=textwrap.fill(textwrap.dedent("""\
5172 help='fix SELinux labels and/or file permissions')
5174 fix_parser.add_argument(
5176 action='store_true',
5178 help='fix SELinux labels for the non-ceph system data'
5180 fix_parser.add_argument(
5182 action='store_true',
5184 help='fix SELinux labels for ceph data'
5186 fix_parser.add_argument(
5188 action='store_true',
5190 help='fix file permissions for ceph data'
5192 fix_parser.add_argument(
5194 action='store_true',
5196 help='perform all the fix-related operations'
5198 fix_parser.set_defaults(
5204 def make_trigger_parser(subparsers):
5205 trigger_parser = subparsers.add_parser(
5207 formatter_class=argparse.RawDescriptionHelpFormatter,
5208 description=textwrap.fill(textwrap.dedent("""\
5209 The partition given in argument is activated. The type of the
5210 partition (data, lockbox, journal etc.) is detected by its
5211 type. If the init system is upstart or systemd, the activation is
5212 delegated to it and runs asynchronously, which
5213 helps reduce the execution time of udev actions.
5215 help='activate any device (called by udev)')
5216 trigger_parser.add_argument(
5220 trigger_parser.add_argument(
5224 help='cluster name to assign this disk to',
5226 trigger_parser.add_argument(
5228 action='store_true', default=None,
5229 help='map devices with dm-crypt',
5231 trigger_parser.add_argument(
5232 '--dmcrypt-key-dir',
5234 default='/etc/ceph/dmcrypt-keys',
5235 help='directory where dm-crypt keys are stored',
5237 trigger_parser.add_argument(
5239 action='store_true', default=None,
5240 help='do operation synchronously; do not trigger systemd',
5242 trigger_parser.set_defaults(
5245 return trigger_parser
5248 def make_activate_parser(subparsers):
5249 activate_parser = subparsers.add_parser(
5251 formatter_class=argparse.RawDescriptionHelpFormatter,
5252 description=textwrap.fill(textwrap.dedent("""\
5253 Activate the OSD found at PATH (can be a directory
5254 or a device partition, possibly encrypted). When
5255 activated for the first time, a unique OSD id is obtained
5256 from the cluster. If PATH is a directory, a symbolic
5257 link is added in {statedir}/osd/ceph-$id. If PATH is
5258 a partition, it is mounted on {statedir}/osd/ceph-$id.
5259 Finally, the OSD daemon is run.
5261 If the OSD depends on auxiliary partitions (journal, block, ...)
5262 they need to be available otherwise activation will fail. It
5263 may happen if a journal is encrypted and cryptsetup was not
5265 """.format(statedir=STATEDIR))),
5266 help='Activate a Ceph OSD')
5267 activate_parser.add_argument(
5269 action='store_true', default=None,
5270 help='mount a block device [deprecated, ignored]',
5272 activate_parser.add_argument(
5275 help='bootstrap-osd keyring path template (%(default)s)',
5276 dest='activate_key_template',
5278 activate_parser.add_argument(
5280 metavar='INITSYSTEM',
5281 help='init system to manage this dir',
5283 choices=INIT_SYSTEMS,
5285 activate_parser.add_argument(
5286 '--no-start-daemon',
5287 action='store_true', default=None,
5288 help='do not start the daemon',
5290 activate_parser.add_argument(
5293 help='path to block device or directory',
5295 activate_parser.add_argument(
5297 action='store_true', default=None,
5298 help='map DATA and/or JOURNAL devices with dm-crypt',
5300 activate_parser.add_argument(
5301 '--dmcrypt-key-dir',
5303 default='/etc/ceph/dmcrypt-keys',
5304 help='directory where dm-crypt keys are stored',
5306 activate_parser.add_argument(
5308 action='store_true', default=False,
5309 help='activate the deactived OSD',
5311 activate_parser.set_defaults(
5312 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5315 return activate_parser
5318 def make_activate_lockbox_parser(subparsers):
5319 parser = subparsers.add_parser(
5321 formatter_class=argparse.RawDescriptionHelpFormatter,
5322 description=textwrap.fill(textwrap.dedent("""\
5323 Mount the partition found at PATH on {statedir}/osd-lockbox/$uuid
5324 where $uuid uniquely identifies the OSD that needs this lockbox
5325 to retrieve keys from the monitor and unlock its partitions.
5327 If the OSD has one or more auxiliary devices (journal, block, ...)
5328 symbolic links are created at {statedir}/osd-lockbox/$other_uuid
5329 and point to {statedir}/osd-lockbox/$uuid. This will, for instance,
5330 allow a journal encrypted in a partition identified by $other_uuid to
5331 fetch the keys it needs from the monitor.
5333 Finally the OSD is activated, as it would be with ceph-disk activate.
5334 """.format(statedir=STATEDIR))),
5335 help='Activate a Ceph lockbox')
5336 parser.add_argument(
5338 help='bootstrap-osd keyring path template (%(default)s)',
5339 dest='activate_key_template',
5341 parser.add_argument(
5342 '--dmcrypt-key-dir',
5344 default='/etc/ceph/dmcrypt-keys',
5345 help='directory where dm-crypt keys are stored',
5347 parser.add_argument(
5350 help='path to block device',
5352 parser.set_defaults(
5353 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5354 func=main_activate_lockbox,
5359 def make_activate_block_parser(subparsers):
5360 return make_activate_space_parser('block', subparsers)
5363 def make_activate_journal_parser(subparsers):
5364 return make_activate_space_parser('journal', subparsers)
5367 def make_activate_space_parser(name, subparsers):
5368 activate_space_parser = subparsers.add_parser(
5369 'activate-%s' % name,
5370 formatter_class=argparse.RawDescriptionHelpFormatter,
5371 description=textwrap.fill(textwrap.dedent("""\
5372 Activating a {name} partition is only meaningfull
5373 if it is encrypted and it will map it using
5376 Finally the corresponding OSD is activated,
5377 as it would be with ceph-disk activate.
5378 """.format(name=name))),
5379 help='Activate an OSD via its %s device' % name)
5380 activate_space_parser.add_argument(
5383 help='path to %s block device' % name,
5385 activate_space_parser.add_argument(
5388 help='bootstrap-osd keyring path template (%(default)s)',
5389 dest='activate_key_template',
5391 activate_space_parser.add_argument(
5393 metavar='INITSYSTEM',
5394 help='init system to manage this dir',
5396 choices=INIT_SYSTEMS,
5398 activate_space_parser.add_argument(
5400 action='store_true', default=None,
5401 help=('map data and/or auxiliariy (journal, etc.) '
5402 'devices with dm-crypt'),
5404 activate_space_parser.add_argument(
5405 '--dmcrypt-key-dir',
5407 default='/etc/ceph/dmcrypt-keys',
5408 help='directory where dm-crypt keys are stored',
5410 activate_space_parser.add_argument(
5412 action='store_true', default=False,
5413 help='activate the deactived OSD',
5415 activate_space_parser.set_defaults(
5416 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5417 func=lambda args: main_activate_space(name, args),
5419 return activate_space_parser
5422 def make_activate_all_parser(subparsers):
5423 activate_all_parser = subparsers.add_parser(
5425 formatter_class=argparse.RawDescriptionHelpFormatter,
5426 description=textwrap.fill(textwrap.dedent("""\
5427 Activate all OSD partitions found in /dev/disk/by-parttypeuuid.
5428 The partitions containing auxiliary devices (journal, block, ...)
5431 help='Activate all tagged OSD partitions')
5432 activate_all_parser.add_argument(
5435 help='bootstrap-osd keyring path template (%(default)s)',
5436 dest='activate_key_template',
5438 activate_all_parser.add_argument(
5440 metavar='INITSYSTEM',
5441 help='init system to manage this dir',
5443 choices=INIT_SYSTEMS,
5445 activate_all_parser.set_defaults(
5446 activate_key_template='{statedir}/bootstrap-osd/{cluster}.keyring',
5447 func=main_activate_all,
5449 return activate_all_parser
5452 def make_list_parser(subparsers):
5453 list_parser = subparsers.add_parser(
5455 formatter_class=argparse.RawDescriptionHelpFormatter,
5456 description=textwrap.fill(textwrap.dedent("""\
5457 Display all partitions on the system and their
5458 associated Ceph information, if any.
5460 help='List disks, partitions, and Ceph OSDs')
5461 list_parser.add_argument(
5463 help='output format',
5465 choices=['json', 'plain'],
5467 list_parser.add_argument(
5471 help='path to block devices, relative to /sys/block',
5473 list_parser.set_defaults(
5479 def make_suppress_parser(subparsers):
5480 suppress_parser = subparsers.add_parser(
5481 'suppress-activate',
5482 formatter_class=argparse.RawDescriptionHelpFormatter,
5483 description=textwrap.fill(textwrap.dedent("""\
5484 Add a prefix to the list of suppressed device names
5485 so that they are ignored by all activate* subcommands.
5487 help='Suppress activate on a device (prefix)')
5488 suppress_parser.add_argument(
5491 help='path to block device or directory',
5493 suppress_parser.set_defaults(
5497 unsuppress_parser = subparsers.add_parser(
5498 'unsuppress-activate',
5499 formatter_class=argparse.RawDescriptionHelpFormatter,
5500 description=textwrap.fill(textwrap.dedent("""\
5501 Remove a prefix from the list of suppressed device names
5502 so that they are no longer ignored by all
5503 activate* subcommands.
5505 help='Stop suppressing activate on a device (prefix)')
5506 unsuppress_parser.add_argument(
5509 help='path to block device or directory',
5511 unsuppress_parser.set_defaults(
5512 func=main_unsuppress,
5514 return suppress_parser
5517 def make_deactivate_parser(subparsers):
5518 deactivate_parser = subparsers.add_parser(
5520 formatter_class=argparse.RawDescriptionHelpFormatter,
5521 description=textwrap.fill(textwrap.dedent("""\
5522 Deactivate the OSD located at PATH. It stops the OSD daemon
5523 and optionally marks it out (with --mark-out). The content of
5524 the OSD is left untouched.
5526 By default, the, ready, active, INIT-specific files are
5527 removed (so that it is not automatically re-activated by the
5528 udev rules or ceph-disk trigger) and the file deactive is
5529 created to remember the OSD is deactivated.
5531 If the --once option is given, the ready, active, INIT-specific
5532 files are not removed and the OSD will reactivate whenever
5533 ceph-disk trigger is run on one of the devices (journal, data,
5534 block, lockbox, ...).
5536 If the OSD is dmcrypt, remove the data dmcrypt map. When
5537 deactivate finishes, the OSD is down.
5539 help='Deactivate a Ceph OSD')
5540 deactivate_parser.add_argument(
5544 help='cluster name to assign this disk to',
5546 deactivate_parser.add_argument(
5550 help='path to block device or directory',
5552 deactivate_parser.add_argument(
5553 '--deactivate-by-id',
5555 help='ID of OSD to deactive'
5557 deactivate_parser.add_argument(
5559 action='store_true', default=False,
5560 help='option to mark the osd out',
5562 deactivate_parser.add_argument(
5564 action='store_true', default=False,
5565 help='does not need --reactivate to activate again',
5567 deactivate_parser.set_defaults(
5568 func=main_deactivate,
5572 def make_destroy_parser(subparsers):
5573 destroy_parser = subparsers.add_parser(
5575 formatter_class=argparse.RawDescriptionHelpFormatter,
5576 description=textwrap.fill(textwrap.dedent("""\ Destroy the OSD located at PATH. It removes the OSD from the
5577 cluster and marks it destroyed. An OSD must be down before it
5578 can be destroyed. Once it is destroyed, a new OSD can be created
5579 in its place, reusing the same OSD id and position (e.g. after
5580 a failed HDD or SSD is replaced). Alternatively, if the
5581 --purge option is also specified, the OSD is removed from the
5582 CRUSH map and the OSD id is deallocated.""")),
5583 help='Destroy a Ceph OSD')
5584 destroy_parser.add_argument(
5588 help='cluster name to assign this disk to',
5590 destroy_parser.add_argument(
5594 help='path to block device or directory',
5596 destroy_parser.add_argument(
5599 help='ID of OSD to destroy'
5601 destroy_parser.add_argument(
5602 '--dmcrypt-key-dir',
5604 default='/etc/ceph/dmcrypt-keys',
5605 help=('directory where dm-crypt keys are stored '
5606 '(If you don\'t know how it work, '
5607 'dont use it. we have default value)'),
5609 destroy_parser.add_argument(
5611 action='store_true', default=False,
5612 help='option to erase data and partition',
5614 destroy_parser.add_argument(
5616 action='store_true', default=False,
5617 help='option to remove OSD from CRUSH map and deallocate the id',
5619 destroy_parser.set_defaults(
5624 def make_zap_parser(subparsers):
5625 zap_parser = subparsers.add_parser(
5627 formatter_class=argparse.RawDescriptionHelpFormatter,
5628 description=textwrap.fill(textwrap.dedent("""\
5629 Zap/erase/destroy a device's partition table and contents. It
5630 actually uses sgdisk and it's option --zap-all to
5631 destroy both GPT and MBR data structures so that the disk
5632 becomes suitable for repartitioning.
5634 help='Zap/erase/destroy a device\'s partition table (and contents)')
5635 zap_parser.add_argument(
5639 help='path to block device',
5641 zap_parser.set_defaults(
5648 args = parse_args(argv)
5650 setup_logging(args.verbose, args.log_stdout)
5652 if args.prepend_to_path != '':
5653 path = os.environ.get('PATH', os.defpath)
5654 os.environ['PATH'] = args.prepend_to_path + ":" + path
5656 if args.func.__name__ != 'main_trigger':
5657 # trigger may run when statedir is unavailable and does not use it
5658 setup_statedir(args.statedir)
5659 setup_sysconfdir(args.sysconfdir)
5661 global CEPH_PREF_USER
5662 CEPH_PREF_USER = args.setuser
5663 global CEPH_PREF_GROUP
5664 CEPH_PREF_GROUP = args.setgroup
5669 main_catch(args.func, args)
5672 def setup_logging(verbose, log_stdout):
5673 loglevel = logging.WARNING
5675 loglevel = logging.DEBUG
5678 ch = logging.StreamHandler(stream=sys.stdout)
5679 ch.setLevel(loglevel)
5680 formatter = logging.Formatter('%(funcName)s: %(message)s')
5681 ch.setFormatter(formatter)
5683 LOG.setLevel(loglevel)
5685 logging.basicConfig(
5687 format='%(funcName)s: %(message)s',
5691 def main_catch(func, args):
5698 '{prog}: {msg}'.format(
5704 except CephDiskException as error:
5705 exc_name = error.__class__.__name__
5707 '{prog} {exc_name}: {msg}'.format(
5719 if __name__ == '__main__':