#!/usr/bin/env python import argparse import errno import json import logging import os import subprocess import sys import time import pwd import grp LOG = logging.getLogger(os.path.basename(sys.argv[0])) QUORUM_STATES = ['leader', 'peon'] def get_ceph_uid(): try: uid = pwd.getpwnam('ceph').pw_uid except: uid = -1 return uid def get_ceph_gid(): try: gid = grp.getgrnam('ceph').gr_gid except: gid = -1 return gid def wait_for_quorum(cluster, mon_id, wait_count=600): # wait 10 minutes by default while wait_count > 0: p = subprocess.Popen( args=[ 'ceph', '--cluster={cluster}'.format(cluster=cluster), '--admin-daemon=/var/run/ceph/{cluster}-mon.{mon_id}.asok'.format( cluster=cluster, mon_id=mon_id, ), 'mon_status', ], stdout=subprocess.PIPE, ) out = p.stdout.read() returncode = p.wait() if returncode != 0: LOG.info('ceph-mon admin socket not ready yet.') time.sleep(1) wait_count -= 1 continue if out == '': LOG.info('ceph-mon admin socket returned no data') time.sleep(1) wait_count -= 1 continue try: data = json.loads(out) except: LOG.info('failed to parse json %s', out) sys.exit(errno.EINVAL) state = data['state'] if state not in QUORUM_STATES: LOG.info('ceph-mon is not in quorum: %r', state) time.sleep(1) wait_count -= 1 continue break if wait_count == 0: raise SystemExit("ceph-mon was not able to join quorum within %d seconds" % wait_count) def get_key(cluster, mon_id, wait_count=600): path = '/etc/ceph/{cluster}.client.admin.keyring'.format( cluster=cluster, ) if os.path.exists(path): LOG.info('Key exists already: %s', path) return tmp = '{path}.{pid}.tmp'.format( path=path, pid=os.getpid(), ) pathdir = os.path.dirname(path) if not os.path.exists(pathdir): os.makedirs(pathdir) os.chmod(pathdir, 0770) os.chown(pathdir, get_ceph_uid(), get_ceph_gid()) while wait_count > 0: try: with file(tmp, 'w') as f: os.fchmod(f.fileno(), 0600) os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid()) LOG.info('Talking to monitor...') args_prefix = [ "ceph", '--connect-timeout=20', '--cluster={cluster}'.format(cluster=cluster), '--name=mon.', '--keyring=/var/lib/ceph/mon/{cluster}-{mon_id}/keyring'.format( cluster=cluster, mon_id=mon_id, ), ] # First try getting the key if it already exists, to handle # the case where it exists but doesn't match the caps # we would pass into get-or-create. returncode = subprocess.call( args=args_prefix + [ 'auth', 'get', 'client.admin', ], stdout=f, ) if returncode == errno.ENOENT: returncode = subprocess.call( args=args_prefix + [ 'auth', 'get-or-create', 'client.admin', 'mon', 'allow *', 'osd', 'allow *', 'mds', 'allow *', 'mgr', 'allow *', ], stdout=f, ) else: returncode = subprocess.call( args=args_prefix + [ 'auth', 'caps', 'client.admin', 'mon', 'allow *', 'osd', 'allow *', 'mds', 'allow *', 'mgr', 'allow *', ], stdout=f, ) if returncode != 0: if returncode == errno.EPERM or returncode == errno.EACCES: LOG.info('Cannot get or create admin key, permission denied') sys.exit(returncode) else: LOG.info('Cannot get or create admin key') time.sleep(1) wait_count -= 1 continue os.rename(tmp, path) break finally: try: os.unlink(tmp) except OSError as e: if e.errno == errno.ENOENT: pass else: raise if wait_count == 0: raise SystemExit("Could not get or create the admin key after %d seconds" % wait_count) def bootstrap_key(cluster, type_, wait_count=600): path = '/var/lib/ceph/bootstrap-{type}/{cluster}.keyring'.format( type=type_, cluster=cluster, ) if os.path.exists(path): LOG.info('Key exists already: %s', path) return tmp = '{path}.{pid}.tmp'.format( path=path, pid=os.getpid(), ) args = [ 'ceph', '--connect-timeout=20', '--cluster={cluster}'.format(cluster=cluster), 'auth', 'get-or-create', 'client.bootstrap-{type}'.format(type=type_), 'mon', 'allow profile bootstrap-{type}'.format(type=type_), ] pathdir = os.path.dirname(path) if not os.path.exists(pathdir): os.makedirs(pathdir) os.chmod(pathdir, 0770) os.chown(pathdir, get_ceph_uid(), get_ceph_gid()) while wait_count > 0: try: with file(tmp, 'w') as f: os.fchmod(f.fileno(), 0600) os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid()) LOG.info('Talking to monitor...') returncode = subprocess.call( args=args, stdout=f, ) if returncode != 0: if returncode == errno.EPERM or returncode == errno.EACCES: LOG.info('Cannot get or create bootstrap key for %s, permission denied', type_) break else: LOG.info('Cannot get or create bootstrap key for %s', type_) time.sleep(1) wait_count -= 1 continue os.rename(tmp, path) break finally: try: os.unlink(tmp) except OSError as e: if e.errno == errno.ENOENT: pass else: raise if wait_count == 0: raise SystemExit("Could not get or create %s bootstrap key after %d seconds" % (type_, wait_count)) def parse_args(): parser = argparse.ArgumentParser( description='Create Ceph client.admin key when ceph-mon is ready', ) parser.add_argument( '-v', '--verbose', action='store_true', default=None, help='be more verbose', ) parser.add_argument( '--cluster', metavar='NAME', help='name of the cluster', ) parser.add_argument( '--id', '-i', metavar='ID', help='id of a ceph-mon that is coming up', required=True, ) parser.add_argument( '--timeout', '-t', metavar='TIMEOUT', type=int, help='timeout in seconds to wait', ) parser.set_defaults( cluster='ceph', timeout=600, ) parser.set_defaults( # we want to hold on to this, for later prog=parser.prog, ) args = parser.parse_args() return args def main(): args = parse_args() loglevel = logging.INFO if args.verbose: loglevel = logging.DEBUG logging.basicConfig( level=loglevel, ) wait_for_quorum(cluster=args.cluster, mon_id=args.id, wait_count=args.timeout) get_key(cluster=args.cluster, mon_id=args.id, wait_count=args.timeout) bootstrap_key( cluster=args.cluster, type_='osd', wait_count=args.timeout, ) bootstrap_key( cluster=args.cluster, type_='rgw', wait_count=args.timeout, ) bootstrap_key( cluster=args.cluster, type_='mds', wait_count=args.timeout, ) bootstrap_key( cluster=args.cluster, type_='rbd', wait_count=args.timeout, ) if __name__ == '__main__': main()