initial code repo
[stor4nfv.git] / src / ceph / src / ceph-create-keys
diff --git a/src/ceph/src/ceph-create-keys b/src/ceph/src/ceph-create-keys
new file mode 100755 (executable)
index 0000000..c14c02f
--- /dev/null
@@ -0,0 +1,315 @@
+#!/usr/bin/env python
+import argparse
+import errno
+import json
+import logging
+import os
+import subprocess
+import sys
+import time
+import pwd
+import grp
+
+
+LOG = logging.getLogger(os.path.basename(sys.argv[0]))
+
+QUORUM_STATES = ['leader', 'peon']
+
+def get_ceph_uid():
+    try:
+        uid = pwd.getpwnam('ceph').pw_uid
+    except:
+        uid = -1
+    return uid
+
+def get_ceph_gid():
+    try:
+        gid = grp.getgrnam('ceph').gr_gid
+    except:
+        gid = -1
+    return gid
+
+def wait_for_quorum(cluster, mon_id, wait_count=600):
+    # wait 10 minutes by default
+    while wait_count > 0:
+        p = subprocess.Popen(
+            args=[
+                'ceph',
+                '--cluster={cluster}'.format(cluster=cluster),
+                '--admin-daemon=/var/run/ceph/{cluster}-mon.{mon_id}.asok'.format(
+                    cluster=cluster,
+                    mon_id=mon_id,
+                    ),
+                'mon_status',
+                ],
+            stdout=subprocess.PIPE,
+            )
+        out = p.stdout.read()
+        returncode = p.wait()
+        if returncode != 0:
+            LOG.info('ceph-mon admin socket not ready yet.')
+            time.sleep(1)
+            wait_count -= 1
+            continue
+
+        if out == '':
+            LOG.info('ceph-mon admin socket returned no data')
+            time.sleep(1)
+            wait_count -= 1
+            continue
+
+        try:
+            data = json.loads(out)
+        except:
+            LOG.info('failed to parse json %s', out)
+            sys.exit(errno.EINVAL)
+
+        state = data['state']
+        if state not in QUORUM_STATES:
+            LOG.info('ceph-mon is not in quorum: %r', state)
+            time.sleep(1)
+            wait_count -= 1
+            continue
+
+        break
+
+    if wait_count == 0:
+        raise SystemExit("ceph-mon was not able to join quorum within %d seconds" % wait_count)
+
+
+def get_key(cluster, mon_id, wait_count=600):
+    path = '/etc/ceph/{cluster}.client.admin.keyring'.format(
+        cluster=cluster,
+        )
+    if os.path.exists(path):
+        LOG.info('Key exists already: %s', path)
+        return
+    tmp = '{path}.{pid}.tmp'.format(
+        path=path,
+        pid=os.getpid(),
+        )
+    pathdir = os.path.dirname(path)
+    if not os.path.exists(pathdir):
+        os.makedirs(pathdir)
+        os.chmod(pathdir, 0770)
+        os.chown(pathdir, get_ceph_uid(), get_ceph_gid())
+    while wait_count > 0:
+        try:
+            with file(tmp, 'w') as f:
+                os.fchmod(f.fileno(), 0600)
+                os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid())
+                LOG.info('Talking to monitor...')
+
+                args_prefix = [
+                        "ceph",
+                        '--connect-timeout=20',
+                        '--cluster={cluster}'.format(cluster=cluster),
+                        '--name=mon.',
+                        '--keyring=/var/lib/ceph/mon/{cluster}-{mon_id}/keyring'.format(
+                            cluster=cluster,
+                            mon_id=mon_id,
+                            ),
+                        ]
+
+                # First try getting the key if it already exists, to handle
+                # the case where it exists but doesn't match the caps
+                # we would pass into get-or-create.
+                returncode = subprocess.call(
+                    args=args_prefix + [
+                        'auth',
+                        'get',
+                        'client.admin',
+                        ],
+                    stdout=f,
+                    )
+                if returncode == errno.ENOENT:
+                    returncode = subprocess.call(
+                        args=args_prefix + [
+                            'auth',
+                            'get-or-create',
+                            'client.admin',
+                            'mon', 'allow *',
+                            'osd', 'allow *',
+                            'mds', 'allow *',
+                            'mgr', 'allow *',
+                            ],
+                        stdout=f,
+                        )
+                else:
+                    returncode = subprocess.call(
+                        args=args_prefix + [
+                            'auth',
+                            'caps',
+                            'client.admin',
+                            'mon', 'allow *',
+                            'osd', 'allow *',
+                            'mds', 'allow *',
+                            'mgr', 'allow *',
+                            ],
+                        stdout=f,
+                        )
+
+            if returncode != 0:
+                if returncode == errno.EPERM or returncode == errno.EACCES:
+                    LOG.info('Cannot get or create admin key, permission denied')
+                    sys.exit(returncode)
+                else:
+                    LOG.info('Cannot get or create admin key')
+                    time.sleep(1)
+                    wait_count -= 1
+                    continue
+
+            os.rename(tmp, path)
+            break
+        finally:
+            try:
+                os.unlink(tmp)
+            except OSError as e:
+                if e.errno == errno.ENOENT:
+                    pass
+                else:
+                    raise
+
+    if wait_count == 0:
+        raise SystemExit("Could not get or create the admin key after %d seconds" % wait_count)
+
+
+def bootstrap_key(cluster, type_, wait_count=600):
+    path = '/var/lib/ceph/bootstrap-{type}/{cluster}.keyring'.format(
+        type=type_,
+        cluster=cluster,
+        )
+    if os.path.exists(path):
+        LOG.info('Key exists already: %s', path)
+        return
+    tmp = '{path}.{pid}.tmp'.format(
+        path=path,
+        pid=os.getpid(),
+        )
+
+    args = [
+        'ceph',
+        '--connect-timeout=20',
+        '--cluster={cluster}'.format(cluster=cluster),
+        'auth',
+        'get-or-create',
+        'client.bootstrap-{type}'.format(type=type_),
+        'mon',
+        'allow profile bootstrap-{type}'.format(type=type_),
+        ]
+
+    pathdir = os.path.dirname(path)
+    if not os.path.exists(pathdir):
+        os.makedirs(pathdir)
+        os.chmod(pathdir, 0770)
+        os.chown(pathdir, get_ceph_uid(), get_ceph_gid())
+
+    while wait_count > 0:
+        try:
+            with file(tmp, 'w') as f:
+                os.fchmod(f.fileno(), 0600)
+                os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid())
+                LOG.info('Talking to monitor...')
+                returncode = subprocess.call(
+                    args=args,
+                    stdout=f,
+                    )
+            if returncode != 0:
+                if returncode == errno.EPERM or returncode == errno.EACCES:
+                    LOG.info('Cannot get or create bootstrap key for %s, permission denied', type_)
+                    break
+                else:
+                    LOG.info('Cannot get or create bootstrap key for %s', type_)
+                    time.sleep(1)
+                    wait_count -= 1
+                    continue
+
+            os.rename(tmp, path)
+            break
+        finally:
+            try:
+                os.unlink(tmp)
+            except OSError as e:
+                if e.errno == errno.ENOENT:
+                    pass
+                else:
+                    raise
+    if wait_count == 0:
+        raise SystemExit("Could not get or create %s bootstrap key after %d seconds" % (type_, wait_count))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Create Ceph client.admin key when ceph-mon is ready',
+        )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true', default=None,
+        help='be more verbose',
+        )
+    parser.add_argument(
+        '--cluster',
+        metavar='NAME',
+        help='name of the cluster',
+        )
+    parser.add_argument(
+        '--id', '-i',
+        metavar='ID',
+        help='id of a ceph-mon that is coming up',
+        required=True,
+        )
+    parser.add_argument(
+        '--timeout', '-t',
+        metavar='TIMEOUT',
+        type=int,
+        help='timeout in seconds to wait',
+        )
+    parser.set_defaults(
+        cluster='ceph',
+        timeout=600,
+        )
+    parser.set_defaults(
+        # we want to hold on to this, for later
+        prog=parser.prog,
+        )
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+
+    loglevel = logging.INFO
+    if args.verbose:
+        loglevel = logging.DEBUG
+
+    logging.basicConfig(
+        level=loglevel,
+        )
+
+    wait_for_quorum(cluster=args.cluster, mon_id=args.id, wait_count=args.timeout)
+    get_key(cluster=args.cluster, mon_id=args.id, wait_count=args.timeout)
+
+    bootstrap_key(
+        cluster=args.cluster,
+        type_='osd',
+        wait_count=args.timeout,
+        )
+    bootstrap_key(
+        cluster=args.cluster,
+        type_='rgw',
+        wait_count=args.timeout,
+        )
+    bootstrap_key(
+        cluster=args.cluster,
+        type_='mds',
+        wait_count=args.timeout,
+        )
+    bootstrap_key(
+        cluster=args.cluster,
+        type_='rbd',
+        wait_count=args.timeout,
+        )
+
+if __name__ == '__main__':
+    main()