2 # file: rbd-recover-tool
4 # Copyright (C) 2015 Ubuntu Kylin
6 # Author: Min Chen <minchen@ubuntukylin.com>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU Library Public License as published by
10 # the Free Software Foundation; either version 2, or (at your option)
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Library Public License for more details.
19 # rbd-recover-tool is an offline recover tool for rbd image in replicated pool
20 # when ceph cluster is stopped.
21 # it is a simple disater recovery policy, just for urgent condition
23 my_dir=$(dirname "$0")
30 #scp files from admin node to osd node
36 #------------ admin node's action -------------
42 if [ "$1"x = ""x ];then
43 echo "$func: not file input"
46 for host in `cat $osd_host`
50 scp $ssh_option $file $host:$job_path 1>/dev/null
57 local func="scp_files"
58 for host in `cat $osd_host`
62 scp $ssh_option $file1 $host:$job_path
63 scp $ssh_option $file2 $host:$job_path
64 scp $ssh_option $file3 $host:$job_path
65 scp $ssh_option $file4 $host:$job_path
72 function scatter_node_jobs()
74 local func="scatter_node_jobs"
77 echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..."
79 trap 'echo $func failed; exit' INT HUP
83 host=`echo $line|awk '{print $1}'`
84 data_path=`echo $line|awk '{print $2}'`
85 check_osd_process $host
87 cmd="mkdir -p $job_path"
88 ssh $ssh_option $host $cmd
89 scp $ssh_option $file1 $host:$job_path >/dev/null
90 scp $ssh_option $file2 $host:$job_path >/dev/null
91 scp $ssh_option $file3 $host:$job_path >/dev/null
92 scp $ssh_option $file4 $host:$job_path >/dev/null
94 cmd="bash $job_path/osd_job flush_osd_journal $data_path;"
95 cmd="$cmd $job_path/osd_job do_omap_list $data_path;"
96 cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;"
97 cmd="$cmd bash $job_path/osd_job do_image_list $data_path;"
99 ssh $ssh_option $host $cmd </dev/null
101 done < $osd_host_path
106 function gather_node_infos()
108 local func="gather_node_infos"
113 trap 'echo $func failed; exit' INT HUP
117 host=`echo $line|awk '{print $1}'`
118 data_path=`echo $line|awk '{print $2}'`
120 check_osd_process $host
123 cmd1="bash $job_path/osd_job cat_pg_epoch $data_path"
124 ssh $ssh_option $host $cmd1 >> $pg_coll
126 cmd2="bash $job_path/osd_job cat_image_v1 $data_path"
127 ssh $ssh_option $host $cmd2 >> $image_coll_v1
129 cmd3="bash $job_path/osd_job cat_image_v2 $data_path"
130 ssh $ssh_option $host $cmd3 >> $image_coll_v2
132 done < $osd_host_path
137 function scatter_gather()
139 local func="scatter_gather"
140 if [ ! -s $osd_host ];then
141 echo "$func: no osd_host input"
144 if [ ! -s $mon_host ];then
145 echo "$func: no mon_host input"
153 #------------- operations --------------
168 lookup_image $1 $2 $3
173 recover_image $1 $2 $3 $4
176 #------------- helper -------------
180 local cmd_name="rbd-recover-tool"
182 echo "$cmd_name is used to recover rbd image of replicated pool,
183 when all ceph services are stopped"
185 echo "$cmd_name database
186 gather pg info, object info, image metadata,
187 and epoch info from all osd nodes,
188 this will cosume a long time, just be patient,
189 especially when scale up to 1000+ osds"
191 list all rbd images of all replicated pools,
192 before to lookup & recover"
193 echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]]
194 show image metadata: image format, rbd id, size, order, snapseq
195 In addtion, for image with snapshots,
196 this will list all snapshot infomations"
197 echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>]
198 all snapshots share one image head, to economize disk space
199 so there is only one snapshot at any time,
200 image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name
201 cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT,
203 recover to raw image/nosnap/head: <image_name>
204 rollback to image head: <image_name>@
205 rollback to image snap: <image_name>@<snap_name>
207 1. recover image nosnap (only one time)
208 2. rollback to image snap"
213 local func="get_path"
217 if [[ $1 =~ // ]];then
218 return # "/path//to" is invalid
220 local parent=`dirname $1`
221 local name=`basename $1`
222 if [ "$parent"x = "/"x ];then
225 echo -n "$parent/$name"
231 local func="admin_cmd"
236 if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then
241 if [ "$1"x = "database"x ];then
246 # remove osd_host to refresh osd_host and osd_host_mapping
250 elif [ "$1"x = "list"x ];then
257 elif [ "$1"x = "lookup"x ];then
265 if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
266 pool_id="${BASH_REMATCH[1]}"
267 image_name="${BASH_REMATCH[2]}"
268 elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
269 pool_id="${BASH_REMATCH[1]}"
270 image_name="${BASH_REMATCH[2]}"
271 snap_name="${BASH_REMATCH[3]}"
273 echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
277 lookup $pool_id $image_name $snap_name
278 elif [ "$1"x = "recover"x ];then
279 if [ $# -lt 2 ] || [ $# -gt 3 ];then
287 if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
288 pool_id="${BASH_REMATCH[1]}"
289 image_name="${BASH_REMATCH[2]}"
290 elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
291 pool_id="${BASH_REMATCH[1]}"
292 image_name="${BASH_REMATCH[2]}"
293 snap_name="${BASH_REMATCH[3]}"
294 if [ "$snap_name"x = ""x ];then
298 echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
302 image_dir=`get_path $3`
303 if [ "image_dir"x = ""x ];then
309 recover $pool_id $image_name $snap_name $image_dir
310 elif [ "$1"x = "scp_files"x ];then
316 elif [ "$1"x = "scp_file"x ];then
323 echo "$func: $1: command not found"