#!/bin/bash # file: rbd-recover-tool # # Copyright (C) 2015 Ubuntu Kylin # # Author: Min Chen # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Library Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library Public License for more details. # # rbd-recover-tool is an offline recover tool for rbd image in replicated pool # when ceph cluster is stopped. # it is a simple disater recovery policy, just for urgent condition my_dir=$(dirname "$0") . $my_dir/common_h . $my_dir/metadata_h . $my_dir/epoch_h . $my_dir/database_h #scp files from admin node to osd node file1=common_h file2=metadata_h file3=epoch_h file4=osd_job #------------ admin node's action ------------- function scp_file() { local func="scp_file" file=$1 if [ "$1"x = ""x ];then echo "$func: not file input" exit fi for host in `cat $osd_host` do { echo "$func: $host" scp $ssh_option $file $host:$job_path 1>/dev/null } & done } function scp_files() { local func="scp_files" for host in `cat $osd_host` do { echo "$func: $host" scp $ssh_option $file1 $host:$job_path scp $ssh_option $file2 $host:$job_path scp $ssh_option $file3 $host:$job_path scp $ssh_option $file4 $host:$job_path } & done wait echo "$func: finish" } function scatter_node_jobs() { local func="scatter_node_jobs" local host= local data_path= echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..." trap 'echo $func failed; exit' INT HUP while read line do { host=`echo $line|awk '{print $1}'` data_path=`echo $line|awk '{print $2}'` check_osd_process $host cmd="mkdir -p $job_path" ssh $ssh_option $host $cmd scp $ssh_option $file1 $host:$job_path >/dev/null scp $ssh_option $file2 $host:$job_path >/dev/null scp $ssh_option $file3 $host:$job_path >/dev/null scp $ssh_option $file4 $host:$job_path >/dev/null cmd="bash $job_path/osd_job flush_osd_journal $data_path;" cmd="$cmd $job_path/osd_job do_omap_list $data_path;" cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;" cmd="$cmd bash $job_path/osd_job do_image_list $data_path;" ssh $ssh_option $host $cmd $pg_coll >$image_coll_v1 >$image_coll_v2 trap 'echo $func failed; exit' INT HUP while read line do { host=`echo $line|awk '{print $1}'` data_path=`echo $line|awk '{print $2}'` echo "$func: $host" check_osd_process $host #pg epoch cmd1="bash $job_path/osd_job cat_pg_epoch $data_path" ssh $ssh_option $host $cmd1 >> $pg_coll #image v1 cmd2="bash $job_path/osd_job cat_image_v1 $data_path" ssh $ssh_option $host $cmd2 >> $image_coll_v1 #image v2 cmd3="bash $job_path/osd_job cat_image_v2 $data_path" ssh $ssh_option $host $cmd3 >> $image_coll_v2 } & done < $osd_host_path wait echo "$func: finish" } function scatter_gather() { local func="scatter_gather" if [ ! -s $osd_host ];then echo "$func: no osd_host input" exit fi if [ ! -s $mon_host ];then echo "$func: no mon_host input" exit fi scatter_node_jobs gather_node_infos } #------------- operations -------------- function database() { scatter_gather gen_database } function list() { list_images } function lookup() { lookup_image $1 $2 $3 } function recover() { recover_image $1 $2 $3 $4 } #------------- helper ------------- function usage() { local cmd_name="rbd-recover-tool" echo echo "$cmd_name is used to recover rbd image of replicated pool, when all ceph services are stopped" echo "Usage:" echo "$cmd_name database gather pg info, object info, image metadata, and epoch info from all osd nodes, this will cosume a long time, just be patient, especially when scale up to 1000+ osds" echo "$cmd_name list list all rbd images of all replicated pools, before to lookup & recover" echo "$cmd_name lookup /[@[]] show image metadata: image format, rbd id, size, order, snapseq In addtion, for image with snapshots, this will list all snapshot infomations" echo "$cmd_name recover /[@[]] [] all snapshots share one image head, to economize disk space so there is only one snapshot at any time, image is saved at /pool_/image_name/image_name cat /pool_/image_name/@CURRENT, will show snapid recover to raw image/nosnap/head: rollback to image head: @ rollback to image snap: @ recover steps: 1. recover image nosnap (only one time) 2. rollback to image snap" } function get_path() { local func="get_path" if [ $# -lt 1 ];then return fi if [[ $1 =~ // ]];then return # "/path//to" is invalid fi local parent=`dirname $1` local name=`basename $1` if [ "$parent"x = "/"x ];then echo "$parent$name" else echo -n "$parent/$name" fi } function admin_cmd() { local func="admin_cmd" if [ $# -lt 1 ];then usage exit fi if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then usage exit fi if [ "$1"x = "database"x ];then if [ $# -gt 1 ];then usage exit fi # remove osd_host to refresh osd_host and osd_host_mapping rm -f $osd_host init_env_admin database elif [ "$1"x = "list"x ];then if [ $# -gt 1 ];then usage exit fi init_env_admin list elif [ "$1"x = "lookup"x ];then if [ $# -gt 2 ];then usage exit fi local pool_id=-1 local image_name= local snap_name= if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then pool_id="${BASH_REMATCH[1]}" image_name="${BASH_REMATCH[2]}" elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then pool_id="${BASH_REMATCH[1]}" image_name="${BASH_REMATCH[2]}" snap_name="${BASH_REMATCH[3]}" else echo "format: $2 is invalid, use /[@[]]" exit fi init_env_admin lookup $pool_id $image_name $snap_name elif [ "$1"x = "recover"x ];then if [ $# -lt 2 ] || [ $# -gt 3 ];then usage exit fi local pool_id=-1 local image_name= local snap_name=@ local image_dir= if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then pool_id="${BASH_REMATCH[1]}" image_name="${BASH_REMATCH[2]}" elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then pool_id="${BASH_REMATCH[1]}" image_name="${BASH_REMATCH[2]}" snap_name="${BASH_REMATCH[3]}" if [ "$snap_name"x = ""x ];then snap_name=@@ fi else echo "format: $2 is invalid, use /[@[]]" exit fi if [ $# = 3 ];then image_dir=`get_path $3` if [ "image_dir"x = ""x ];then echo "$3 invalid" exit fi fi init_env_admin recover $pool_id $image_name $snap_name $image_dir elif [ "$1"x = "scp_files"x ];then if [ $# -gt 1 ];then exit fi admin_parse_osd scp_files elif [ "$1"x = "scp_file"x ];then if [ $# -gt 2 ];then exit fi admin_parse_osd scp_file $2 else echo "$func: $1: command not found" fi } admin_cmd $*