Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / src / ceph-create-keys
1 #!/usr/bin/env python
2 import argparse
3 import errno
4 import json
5 import logging
6 import os
7 import subprocess
8 import sys
9 import time
10 import pwd
11 import grp
12
13
14 LOG = logging.getLogger(os.path.basename(sys.argv[0]))
15
16 QUORUM_STATES = ['leader', 'peon']
17
18 def get_ceph_uid():
19     try:
20         uid = pwd.getpwnam('ceph').pw_uid
21     except:
22         uid = -1
23     return uid
24
25 def get_ceph_gid():
26     try:
27         gid = grp.getgrnam('ceph').gr_gid
28     except:
29         gid = -1
30     return gid
31
32 def wait_for_quorum(cluster, mon_id, wait_count=600):
33     # wait 10 minutes by default
34     while wait_count > 0:
35         p = subprocess.Popen(
36             args=[
37                 'ceph',
38                 '--cluster={cluster}'.format(cluster=cluster),
39                 '--admin-daemon=/var/run/ceph/{cluster}-mon.{mon_id}.asok'.format(
40                     cluster=cluster,
41                     mon_id=mon_id,
42                     ),
43                 'mon_status',
44                 ],
45             stdout=subprocess.PIPE,
46             )
47         out = p.stdout.read()
48         returncode = p.wait()
49         if returncode != 0:
50             LOG.info('ceph-mon admin socket not ready yet.')
51             time.sleep(1)
52             wait_count -= 1
53             continue
54
55         if out == '':
56             LOG.info('ceph-mon admin socket returned no data')
57             time.sleep(1)
58             wait_count -= 1
59             continue
60
61         try:
62             data = json.loads(out)
63         except:
64             LOG.info('failed to parse json %s', out)
65             sys.exit(errno.EINVAL)
66
67         state = data['state']
68         if state not in QUORUM_STATES:
69             LOG.info('ceph-mon is not in quorum: %r', state)
70             time.sleep(1)
71             wait_count -= 1
72             continue
73
74         break
75
76     if wait_count == 0:
77         raise SystemExit("ceph-mon was not able to join quorum within %d seconds" % wait_count)
78
79
80 def get_key(cluster, mon_id, wait_count=600):
81     path = '/etc/ceph/{cluster}.client.admin.keyring'.format(
82         cluster=cluster,
83         )
84     if os.path.exists(path):
85         LOG.info('Key exists already: %s', path)
86         return
87     tmp = '{path}.{pid}.tmp'.format(
88         path=path,
89         pid=os.getpid(),
90         )
91     pathdir = os.path.dirname(path)
92     if not os.path.exists(pathdir):
93         os.makedirs(pathdir)
94         os.chmod(pathdir, 0770)
95         os.chown(pathdir, get_ceph_uid(), get_ceph_gid())
96     while wait_count > 0:
97         try:
98             with file(tmp, 'w') as f:
99                 os.fchmod(f.fileno(), 0600)
100                 os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid())
101                 LOG.info('Talking to monitor...')
102
103                 args_prefix = [
104                         "ceph",
105                         '--connect-timeout=20',
106                         '--cluster={cluster}'.format(cluster=cluster),
107                         '--name=mon.',
108                         '--keyring=/var/lib/ceph/mon/{cluster}-{mon_id}/keyring'.format(
109                             cluster=cluster,
110                             mon_id=mon_id,
111                             ),
112                         ]
113
114                 # First try getting the key if it already exists, to handle
115                 # the case where it exists but doesn't match the caps
116                 # we would pass into get-or-create.
117                 returncode = subprocess.call(
118                     args=args_prefix + [
119                         'auth',
120                         'get',
121                         'client.admin',
122                         ],
123                     stdout=f,
124                     )
125                 if returncode == errno.ENOENT:
126                     returncode = subprocess.call(
127                         args=args_prefix + [
128                             'auth',
129                             'get-or-create',
130                             'client.admin',
131                             'mon', 'allow *',
132                             'osd', 'allow *',
133                             'mds', 'allow *',
134                             'mgr', 'allow *',
135                             ],
136                         stdout=f,
137                         )
138                 else:
139                     returncode = subprocess.call(
140                         args=args_prefix + [
141                             'auth',
142                             'caps',
143                             'client.admin',
144                             'mon', 'allow *',
145                             'osd', 'allow *',
146                             'mds', 'allow *',
147                             'mgr', 'allow *',
148                             ],
149                         stdout=f,
150                         )
151
152             if returncode != 0:
153                 if returncode == errno.EPERM or returncode == errno.EACCES:
154                     LOG.info('Cannot get or create admin key, permission denied')
155                     sys.exit(returncode)
156                 else:
157                     LOG.info('Cannot get or create admin key')
158                     time.sleep(1)
159                     wait_count -= 1
160                     continue
161
162             os.rename(tmp, path)
163             break
164         finally:
165             try:
166                 os.unlink(tmp)
167             except OSError as e:
168                 if e.errno == errno.ENOENT:
169                     pass
170                 else:
171                     raise
172
173     if wait_count == 0:
174         raise SystemExit("Could not get or create the admin key after %d seconds" % wait_count)
175
176
177 def bootstrap_key(cluster, type_, wait_count=600):
178     path = '/var/lib/ceph/bootstrap-{type}/{cluster}.keyring'.format(
179         type=type_,
180         cluster=cluster,
181         )
182     if os.path.exists(path):
183         LOG.info('Key exists already: %s', path)
184         return
185     tmp = '{path}.{pid}.tmp'.format(
186         path=path,
187         pid=os.getpid(),
188         )
189
190     args = [
191         'ceph',
192         '--connect-timeout=20',
193         '--cluster={cluster}'.format(cluster=cluster),
194         'auth',
195         'get-or-create',
196         'client.bootstrap-{type}'.format(type=type_),
197         'mon',
198         'allow profile bootstrap-{type}'.format(type=type_),
199         ]
200
201     pathdir = os.path.dirname(path)
202     if not os.path.exists(pathdir):
203         os.makedirs(pathdir)
204         os.chmod(pathdir, 0770)
205         os.chown(pathdir, get_ceph_uid(), get_ceph_gid())
206
207     while wait_count > 0:
208         try:
209             with file(tmp, 'w') as f:
210                 os.fchmod(f.fileno(), 0600)
211                 os.fchown(f.fileno(), get_ceph_uid(), get_ceph_gid())
212                 LOG.info('Talking to monitor...')
213                 returncode = subprocess.call(
214                     args=args,
215                     stdout=f,
216                     )
217             if returncode != 0:
218                 if returncode == errno.EPERM or returncode == errno.EACCES:
219                     LOG.info('Cannot get or create bootstrap key for %s, permission denied', type_)
220                     break
221                 else:
222                     LOG.info('Cannot get or create bootstrap key for %s', type_)
223                     time.sleep(1)
224                     wait_count -= 1
225                     continue
226
227             os.rename(tmp, path)
228             break
229         finally:
230             try:
231                 os.unlink(tmp)
232             except OSError as e:
233                 if e.errno == errno.ENOENT:
234                     pass
235                 else:
236                     raise
237     if wait_count == 0:
238         raise SystemExit("Could not get or create %s bootstrap key after %d seconds" % (type_, wait_count))
239
240
241 def parse_args():
242     parser = argparse.ArgumentParser(
243         description='Create Ceph client.admin key when ceph-mon is ready',
244         )
245     parser.add_argument(
246         '-v', '--verbose',
247         action='store_true', default=None,
248         help='be more verbose',
249         )
250     parser.add_argument(
251         '--cluster',
252         metavar='NAME',
253         help='name of the cluster',
254         )
255     parser.add_argument(
256         '--id', '-i',
257         metavar='ID',
258         help='id of a ceph-mon that is coming up',
259         required=True,
260         )
261     parser.add_argument(
262         '--timeout', '-t',
263         metavar='TIMEOUT',
264         type=int,
265         help='timeout in seconds to wait',
266         )
267     parser.set_defaults(
268         cluster='ceph',
269         timeout=600,
270         )
271     parser.set_defaults(
272         # we want to hold on to this, for later
273         prog=parser.prog,
274         )
275     args = parser.parse_args()
276     return args
277
278
279 def main():
280     args = parse_args()
281
282     loglevel = logging.INFO
283     if args.verbose:
284         loglevel = logging.DEBUG
285
286     logging.basicConfig(
287         level=loglevel,
288         )
289
290     wait_for_quorum(cluster=args.cluster, mon_id=args.id, wait_count=args.timeout)
291     get_key(cluster=args.cluster, mon_id=args.id, wait_count=args.timeout)
292
293     bootstrap_key(
294         cluster=args.cluster,
295         type_='osd',
296         wait_count=args.timeout,
297         )
298     bootstrap_key(
299         cluster=args.cluster,
300         type_='rgw',
301         wait_count=args.timeout,
302         )
303     bootstrap_key(
304         cluster=args.cluster,
305         type_='mds',
306         wait_count=args.timeout,
307         )
308     bootstrap_key(
309         cluster=args.cluster,
310         type_='rbd',
311         wait_count=args.timeout,
312         )
313
314 if __name__ == '__main__':
315     main()