Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / src / init-ceph.in
1 #!/bin/sh
2 # Start/stop ceph daemons
3 # chkconfig: 2345 60 80
4
5 ### BEGIN INIT INFO
6 # Provides:          ceph
7 # Default-Start:     2 3 4 5
8 # Default-Stop:      0 1 6
9 # Required-Start:    $remote_fs $named $network $time
10 # Required-Stop:     $remote_fs $named $network $time
11 # Short-Description: Start Ceph distributed file system daemons at boot time
12 # Description:       Enable Ceph distributed file system services.
13 ### END INIT INFO
14
15 # TODO: on FreeBSD/OSX, use equivalent script file
16 if [ -e /lib/lsb/init-functions ]; then
17     . /lib/lsb/init-functions
18 fi
19
20 if [ `dirname $0` = "." ] && [ $PWD != "/etc/init.d" ]; then
21     # looks like an autotools src dir build
22     BINDIR=.
23     SBINDIR=.
24     LIBEXECDIR=.
25     ETCDIR=.
26     ASSUME_DEV=1
27 else
28     if [ -e CMakeCache.txt ] && [ -e bin/init-ceph ]; then
29         # looks like a cmake build directory
30         CEPH_ROOT=`grep ceph_SOURCE_DIR CMakeCache.txt | cut -d "=" -f 2`
31         BINDIR=bin
32         SBINDIR=bin
33         LIBEXECDIR=$CEPH_ROOT/src
34         ETCDIR=.
35         ASSUME_DEV=1
36         CEPH_LIB=$CEPH_ROOT/build/lib
37         echo "$PYTHONPATH" | grep -q $CEPH_LIB || export PYTHONPATH=$CEPH_LIB/cython_modules/lib.2:$PYTHONPATH
38         echo "$LD_LIBRARY_PATH" | grep -q $CEPH_LIB || export LD_LIBRARY_PATH=$CEPH_LIB:$LD_LIBRARY_PATH
39         echo "$DYLD_LIBRARY_PATH" | grep -q $CEPH_LIB || export DYLD_LIBRARY_PATH=$CEPH_LIB:$DYLD_LIBRARY_PATH
40     else
41         BINDIR=@bindir@
42         SBINDIR=@sbindir@
43         LIBEXECDIR=@libexecdir@/ceph
44         ETCDIR=@sysconfdir@/ceph
45         ASSUME_DEV=0
46     fi
47 fi
48
49 if [ -n "$CEPH_BIN" ] && [ -n "$CEPH_ROOT" ] && [ -n "$CEPH_BUILD_DIR" ]; then
50   BINDIR=$CEPH_BIN
51   SBINDIR=$CEPH_ROOT/src
52   ETCDIR=$CEPH_BIN
53   LIBEXECDIR=$CEPH_ROOT/src
54   ASSUME_DEV=1
55 fi
56
57 if [ `uname` = FreeBSD ]; then
58   GETOPT=/usr/local/bin/getopt
59 else
60   GETOPT=getopt
61 fi
62
63 if id ceph > /dev/null 2>&1; then
64   SET_CEPHUSER_ARGS=" --setuser ceph --setgroup ceph"
65 fi
66
67 usage_exit() {
68     echo "usage: $0 [options] {start|stop|restart|condrestart} [mon|osd|mds]..."
69     printf "Core options:\n"
70     printf "\t--allhosts / -a           execute (via ssh) on all hosts in conf file\n"
71     printf "\t--cluster [cluster name]  define the cluster name\n"
72     printf "\t--conf / -c [conf file]   use [conf file] instead of default\n"
73     printf "\t--help / -h               show this usage message\n"
74     printf "\t--hostname [hostname]     override hostname lookup\n"
75     printf "\t-m [mon addr]             mon address\n"
76     printf "\n"
77     printf "Other options:\n"
78     printf "\t--btrfs                   btrfs\n"
79     printf "\t--nobtrfs                 no btrfs\n"
80     printf "\t--btrfsumount             btrfs umount\n"
81     printf "\t--fsmount                 fsmount\n"
82     printf "\t--nofsmount               no fsmount\n"
83     printf "\t--fsumount                fsumount\n"
84     printf "\t--restart                 restart on core dump\n"
85     printf "\t--norestart               do not restart on core dump\n"
86     printf "\t--valgrind                run via valgrind\n"
87     printf "\t--novalgrind              do not run via valgrind\n"
88     printf "\t--verbose / -v            be verbose\n"
89     exit
90 }
91
92 # behave if we are not completely installed (e.g., Debian "removed,
93 # config remains" state)
94 test -f $LIBEXECDIR/ceph_common.sh || exit 0
95
96 . $LIBEXECDIR/ceph_common.sh
97
98 EXIT_STATUS=0
99
100 signal_daemon() {
101     name=$1
102     daemon=$2
103     pidfile=$3
104     signal=$4
105     action=$5
106     [ -z "$action" ] && action="Stopping"
107     printf "$action Ceph $name on $host..."
108     do_cmd "if [ -e $pidfile ]; then
109         pid=\`cat $pidfile\`
110         if ps -p \$pid -o args= | grep -q $daemon; then
111             cmd=\"kill $signal \$pid\"
112             printf \"\$cmd...\"
113             \$cmd
114         fi
115     fi"
116     echo done
117 }
118
119 daemon_is_running() {
120     name=$1
121     daemon=$2
122     daemon_id=$3
123     pidfile=$4
124     do_cmd "[ -e $pidfile ] || exit 1   # no pid, presumably not running
125         pid=\`cat $pidfile\`
126         ps -p \$pid -o args= | grep $daemon | grep -qwe -i.$daemon_id && exit 0 # running
127         exit 1  # pid is something else" "" "okfail"
128 }
129
130 stop_daemon() {
131     name=$1
132     daemon=$2
133     pidfile=$3
134     signal=$4
135     action=$5
136     [ -z "$action" ] && action="Stopping"
137     printf "$action Ceph $name on $host..."
138     do_cmd "if [ -e $pidfile ] ; then 
139         pid=\`cat $pidfile\`
140         while ps -p \$pid -o args= | grep -q $daemon; do
141             cmd=\"kill $signal \$pid\"
142             printf \"\$cmd...\"
143             \$cmd
144             sleep 1
145             continue
146         done
147     fi"
148     echo done
149 }
150
151 ## command line options
152 options=
153
154 OPTS=$(${GETOPT} -n 'init-ceph' -o 'hvam:c:' -l 'help,verbose,valgrind,novalgrind,allhosts,restart,norestart,btrfs,nobtrfs,fsmount,nofsmount,btrfsumount,fsumount,conf:,cluster:,hostname:' -- "$@")
155 if [ $? != 0 ]
156 then
157     exit 1
158 fi
159
160 eval set -- "$OPTS"
161
162 dovalgrind=
163 docrun=
164 allhosts=0
165 monaddr=
166 dofsmount=1
167 dofsumount=0
168 verbose=0
169 use_default_conf=1
170
171 ## set variables like cluster or conf
172 [ -e /etc/sysconfig/ceph ] && . /etc/sysconfig/ceph
173 [ -e /etc/default/ceph ] && . /etc/default/ceph
174
175
176 while echo $1 | grep -q '^-'; do     # FIXME: why not '^-'?
177 case $1 in
178     -v | --verbose)
179             verbose=1
180             ;;
181     --valgrind)
182             dovalgrind=1
183             ;;
184     --novalgrind)
185             dovalgrind=0
186             ;;
187     --allhosts | -a)
188             allhosts=1;
189             ;;
190     --restart)
191             docrun=1
192             ;;
193     --norestart)
194             docrun=0
195             ;;
196     -h | --help)
197             usage_exit
198             ;;
199     -m )
200             [ -z "$2" ] && usage_exit
201             options="$options $1"
202             shift
203             MON_ADDR=$1
204             ;;
205     --btrfs | --fsmount)
206             dofsmount=1
207             ;;
208     --nobtrfs | --nofsmount)
209             dofsmount=0
210             ;;
211     --btrfsumount | --fsumount)
212             dofsumount=1
213             ;;
214     --conf | -c)
215             [ -z "$2" ] && usage_exit
216             options="$options $1"
217             shift
218         use_default_conf=0
219             conf=$1
220             ;;
221     --cluster )
222             [ -z "$2" ] && usage_exit
223             options="$options $1"
224             shift
225             cluster=$1
226             ;;
227     --hostname )
228             [ -z "$2" ] && usage_exit
229             options="$options $1"
230             shift
231             hostname=$1
232             ;;
233     -- )
234             shift
235             break
236             ;;
237     *)
238             echo unrecognized option \'$1\'
239             usage_exit
240             ;;
241 esac
242 options="$options $1"
243 shift
244 done
245
246 # if `--cluster` was not passed in, fallback to looking at the config name
247 if [ -z "$cluster" ]; then
248     cluster=`echo $conf | awk -F'/' '{print $(NF)}' | cut -d'.' -f 1`
249 else
250     # if we were told to use a given cluster name then $conf needs to be updated
251     # but just define it if `--conf` was not specified, otherwise we would be silently
252     # overriding $conf even if it was defined with `--conf`
253     if [ $use_default_conf -eq 1 ]; then
254         conf="/etc/ceph/$cluster.conf"
255     fi
256 fi
257
258
259 verify_conf
260
261 command=$1
262 [ -n "$*" ] && shift
263
264 get_local_name_list
265 get_name_list "$@"
266
267 # Reverse the order if we are stopping
268
269 if [ "$command" = "stop" -o "$command" = "onestop" ]; then
270     for f in $what; do
271        new_order="$f $new_order"
272     done
273     what="$new_order"
274 fi
275
276 for name in $what; do
277     type=`echo $name | cut -c 1-3`   # e.g. 'mon', if $item is 'mon1'
278     id=`echo $name | cut -c 4- | sed 's/^\\.//'`
279     num=$id
280     name="$type.$id"
281
282     check_host $cluster || continue
283
284     binary="$BINDIR/ceph-$type"
285     cmd="$binary -i $id"
286     if [ $ASSUME_DEV -eq 1 ]; then
287       cmd="PATH=$PWD:$PATH $cmd"
288     fi
289
290     get_conf run_dir "/var/run/ceph" "run dir"
291
292     get_conf pid_file "$run_dir/$type.$id.pid" "pid file"
293
294     if [ "$command" = "start" -o "$command" = "onestart" ]; then
295         if [ -n "$pid_file" ]; then
296             do_cmd "mkdir -p "`dirname $pid_file`
297             cmd="$cmd --pid-file $pid_file"
298         fi
299
300         get_conf log_dir "" "log dir"
301         [ -n "$log_dir" ] && do_cmd "mkdir -p $log_dir"
302
303         get_conf auto_start "" "auto start"
304         if [ "$auto_start" = "no" ] || [ "$auto_start" = "false" ] || [ "$auto_start" = "0" ]; then
305             if [ -z "$@" ]; then
306                 echo "Skipping Ceph $name on $host... auto start is disabled"
307                 continue
308             fi
309         fi
310
311         if daemon_is_running $name ceph-$type $id $pid_file; then
312             echo "Starting Ceph $name on $host...already running"
313             continue
314         fi
315
316         get_conf copy_executable_to "" "copy executable to"
317         if [ -n "$copy_executable_to" ]; then
318             scp $binary "$host:$copy_executable_to"
319             binary="$copy_executable_to"
320         fi
321     fi
322
323     # conf file
324     cmd="$cmd -c $conf"
325
326     if echo $name | grep -q ^osd; then
327         get_conf osd_data "/var/lib/ceph/osd/$cluster-$id" "osd data"
328         get_conf fs_path "$osd_data" "fs path"  # mount point defaults so osd data
329         get_conf fs_devs "" "devs"
330         if [ -z "$fs_devs" ]; then
331             # try to fallback to old keys
332             get_conf tmp_btrfs_devs "" "btrfs devs"
333             if [ -n "$tmp_btrfs_devs" ]; then
334                 fs_devs="$tmp_btrfs_devs"
335             fi
336         fi
337         first_dev=`echo $fs_devs | cut '-d ' -f 1`
338     fi
339
340     # do lockfile, if RH
341     get_conf lockfile "/var/lock/subsys/ceph" "lock file"
342     lockdir=`dirname $lockfile`
343     if [ ! -d "$lockdir" ]; then
344         lockfile=""
345     fi
346
347     get_conf asok "$run_dir/$cluster-$type.$id.asok" "admin socket"
348
349     case "$command" in
350         start|onestart)
351             # Increase max_open_files, if the configuration calls for it.
352             get_conf max_open_files "32768" "max open files"
353
354             # build final command
355             wrap=""
356             runmode=""
357             runarg=""
358
359             [ -z "$docrun" ] && get_conf_bool docrun "0" "restart on core dump"
360             [ "$docrun" -eq 1 ] && wrap="$BINDIR/ceph-run"
361
362             [ -z "$dovalgrind" ] && get_conf_bool valgrind "" "valgrind"
363             [ -n "$valgrind" ] && wrap="$wrap valgrind $valgrind"
364
365             [ -n "$wrap" ] && runmode="-f &" && runarg="-f"
366             [ -n "$max_open_files" ] && files="ulimit -n $max_open_files;"
367
368             [ -n "$TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES" ] && tcmalloc="TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=$TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES"
369
370             cmd="$files $tcmalloc $wrap $cmd --cluster $cluster ${SET_CEPHUSER_ARGS} $runmode"
371
372             if [ $dofsmount -eq 1 ] && [ -n "$fs_devs" ]; then
373                 get_conf pre_mount "true" "pre mount command"
374                 get_conf fs_type "" "osd mkfs type"
375
376                 if [ -z "$fs_type" ]; then
377                     # try to fallback to old keys
378                     get_conf tmp_devs "" "btrfs devs"
379                     if [ -n "$tmp_devs" ]; then
380                         fs_type="btrfs"
381                     else
382                         echo No filesystem type defined!
383                         exit 0
384                     fi
385                 fi
386
387                 get_conf fs_opt "" "osd mount options $fs_type"
388                 if [ -z "$fs_opt" ]; then
389                     if [ "$fs_type" = "btrfs" ]; then
390                         #try to fallback to old keys
391                         get_conf fs_opt "" "btrfs options"
392                     fi
393
394                     if [ -z "$fs_opt" ]; then
395                         if [ "$fs_type" = "xfs" ]; then
396                             fs_opt="rw,noatime,inode64"
397                         else
398                             #fallback to use at least noatime
399                             fs_opt="rw,noatime"
400                         fi
401                     fi
402                 fi
403
404                 [ -n "$fs_opt" ] && fs_opt="-o $fs_opt"
405                 [ -n "$pre_mount" ] && do_cmd "$pre_mount"
406
407                 do_root_cmd_okfail "mkdir -p $fs_path"
408                 if [ "$fs_type" = "btrfs" ]; then
409                     echo Mounting Btrfs on $host:$fs_path
410                     do_root_cmd_okfail "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path ' /proc/mounts && umount $fs_path ; mount -t btrfs $fs_opt $first_dev $fs_path"
411                 else
412                     echo Mounting $fs_type on $host:$fs_path
413                     do_root_cmd_okfail "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path ' /proc/mounts && umount $fs_path ; mount -t $fs_type $fs_opt $first_dev $fs_path"
414                 fi
415                 if [ "$ERR" != "0" ]; then
416                     EXIT_STATUS=$ERR
417                     continue
418                 fi
419             fi
420
421             if [ "$type" = "osd" ]; then
422                 get_conf update_crush "" "osd crush update on start"
423                 case "${update_crush:-1}" in 1|[Tt][Rr][Uu][Ee])
424                     # update location in crush
425                     get_conf osd_location_hook "$BINDIR/ceph-crush-location" "osd crush location hook"
426                    if [ ! -e "$osd_location_hook" ]; then
427                        EXIT_STATUS=2
428                        continue
429                    fi
430                     osd_location=`$osd_location_hook --cluster $cluster --id $id --type osd`
431                     get_conf osd_weight "" "osd crush initial weight"
432                     defaultweight="$(df -P -k $osd_data/. | tail -1 | awk '{ print sprintf("%.4f",$2/1073741824) }')"
433                     get_conf osd_keyring "$osd_data/keyring" "keyring"
434                     do_cmd_okfail "timeout 30 $BINDIR/ceph -c $conf --name=osd.$id --keyring=$osd_keyring osd crush create-or-move -- $id ${osd_weight:-${defaultweight:-1}} $osd_location"
435                     if [ "$ERR" != "0" ]; then
436                         EXIT_STATUS=$ERR
437                         continue
438                     fi
439                 esac
440             fi
441
442             echo Starting Ceph $name on $host...
443             if [ ! -d $run_dir ]; then
444                 # assume /var/run exists
445                 install -d -m0770 -o ceph -g ceph /var/run/ceph
446             fi
447             get_conf pre_start_eval "" "pre start eval"
448             [ -n "$pre_start_eval" ] && $pre_start_eval
449             get_conf pre_start "" "pre start command"
450             get_conf post_start "" "post start command"
451             [ -n "$pre_start" ] && do_cmd "$pre_start"
452             do_cmd_okfail "$cmd" $runarg
453             if [ "$ERR" != "0" ]; then
454                 EXIT_STATUS=$ERR
455                 continue
456             fi
457
458             [ -n "$post_start" ] && do_cmd "$post_start"
459             [ -n "$lockfile" ] && [ "$?" -eq 0 ] && touch $lockfile
460             ;;
461
462         stop|onestop)
463             get_conf pre_stop "" "pre stop command"
464             get_conf post_stop "" "post stop command"
465             [ -n "$pre_stop" ] && do_cmd "$pre_stop"
466             stop_daemon $name ceph-$type $pid_file
467             [ -n "$pidfile" ] && rm -f $pidfile
468             [ -n "$asok" ] && rm -f $asok
469             [ -n "$post_stop" ] && do_cmd "$post_stop"
470             [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
471             if [ $dofsumount -eq 1 ] && [ -n "$fs_devs" ]; then
472                 echo Unmounting OSD volume on $host:$fs_path
473                 do_root_cmd "umount $fs_path || true"
474             fi
475             ;;
476
477         status)
478             if daemon_is_running $name ceph-$type $id $pid_file; then
479                 printf "$name: running "
480                 do_cmd "$BINDIR/ceph daemon $name version 2>/dev/null" || printf unknown
481                 printf "\n"
482             elif [ -e "$pid_file" ]; then
483                 # daemon is dead, but pid file still exists
484                 echo "$name: dead."
485                 EXIT_STATUS=1
486             else
487                 # daemon is dead, and pid file is gone
488                 echo "$name: not running."
489                 EXIT_STATUS=3
490             fi
491             ;;
492
493         ssh)
494             $ssh
495             ;;
496
497         forcestop)
498             get_conf pre_forcestop "" "pre forcestop command"
499             get_conf post_forcestop "" "post forcestop command"
500             [ -n "$pre_forcestop" ] && do_cmd "$pre_forcestop"
501             stop_daemon $name ceph-$type $pid_file -9
502             [ -n "$post_forcestop" ] && do_cmd "$post_forcestop"
503             [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
504             ;;
505
506         killall)
507             echo "killall ceph-$type on $host"
508             do_cmd "pkill ^ceph-$type || true"
509             [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
510             ;;
511
512         force-reload | reload)
513             signal_daemon $name ceph-$type $pid_file -1 "Reloading"
514             ;;
515
516         restart|onerestart)
517             $0 $options stop $name
518             $0 $options start $name
519             ;;
520
521         condrestart)
522             if daemon_is_running $name ceph-$type $id $pid_file; then
523                 $0 $options stop $name
524                 $0 $options start $name
525             else
526                 echo "$name: not running."
527             fi
528             ;;
529
530         cleanlogs)
531             echo removing logs
532             [ -n "$log_dir" ] && do_cmd "rm -f $log_dir/$type.$id.*"
533             ;;
534
535         cleanalllogs)
536             echo removing all logs
537             [ -n "$log_dir" ] && do_cmd "rm -f $log_dir/* || true"
538             ;;
539
540         *)
541             usage_exit
542             ;;
543     esac
544 done
545
546 # activate latent osds?
547 if [ "$command" = "start" -a "$BINDIR" != "." ]; then
548     if [ "$*" = "" ] || echo $* | grep -q ^osd\$ ; then
549        if [ -x $SBINDIR/ceph-disk ]; then
550            ceph-disk activate-all
551        fi
552     fi
553 fi
554
555 exit $EXIT_STATUS