+++ /dev/null
-#!/bin/bash
-# file: rbd-recover-tool
-#
-# Copyright (C) 2015 Ubuntu Kylin
-#
-# Author: Min Chen <minchen@ubuntukylin.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Library Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Library Public License for more details.
-#
-
-# rbd-recover-tool is an offline recover tool for rbd image in replicated pool
-# when ceph cluster is stopped.
-# it is a simple disater recovery policy, just for urgent condition
-
-my_dir=$(dirname "$0")
-
-. $my_dir/common_h
-. $my_dir/metadata_h
-. $my_dir/epoch_h
-. $my_dir/database_h
-
-#scp files from admin node to osd node
-file1=common_h
-file2=metadata_h
-file3=epoch_h
-file4=osd_job
-
-#------------ admin node's action -------------
-
-function scp_file()
-{
- local func="scp_file"
- file=$1
- if [ "$1"x = ""x ];then
- echo "$func: not file input"
- exit
- fi
- for host in `cat $osd_host`
- do
- {
- echo "$func: $host"
- scp $ssh_option $file $host:$job_path 1>/dev/null
- } &
- done
-}
-
-function scp_files()
-{
- local func="scp_files"
- for host in `cat $osd_host`
- do
- {
- echo "$func: $host"
- scp $ssh_option $file1 $host:$job_path
- scp $ssh_option $file2 $host:$job_path
- scp $ssh_option $file3 $host:$job_path
- scp $ssh_option $file4 $host:$job_path
- } &
- done
- wait
- echo "$func: finish"
-}
-
-function scatter_node_jobs()
-{
- local func="scatter_node_jobs"
- local host=
- local data_path=
- echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..."
-
- trap 'echo $func failed; exit' INT HUP
- while read line
- do
- {
- host=`echo $line|awk '{print $1}'`
- data_path=`echo $line|awk '{print $2}'`
- check_osd_process $host
-
- cmd="mkdir -p $job_path"
- ssh $ssh_option $host $cmd
- scp $ssh_option $file1 $host:$job_path >/dev/null
- scp $ssh_option $file2 $host:$job_path >/dev/null
- scp $ssh_option $file3 $host:$job_path >/dev/null
- scp $ssh_option $file4 $host:$job_path >/dev/null
-
- cmd="bash $job_path/osd_job flush_osd_journal $data_path;"
- cmd="$cmd $job_path/osd_job do_omap_list $data_path;"
- cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;"
- cmd="$cmd bash $job_path/osd_job do_image_list $data_path;"
-
- ssh $ssh_option $host $cmd </dev/null
- } &
- done < $osd_host_path
- wait
- echo "$func: finish"
-}
-
-function gather_node_infos()
-{
- local func="gather_node_infos"
- echo "$func ..."
- >$pg_coll
- >$image_coll_v1
- >$image_coll_v2
- trap 'echo $func failed; exit' INT HUP
- while read line
- do
- {
- host=`echo $line|awk '{print $1}'`
- data_path=`echo $line|awk '{print $2}'`
- echo "$func: $host"
- check_osd_process $host
-
- #pg epoch
- cmd1="bash $job_path/osd_job cat_pg_epoch $data_path"
- ssh $ssh_option $host $cmd1 >> $pg_coll
- #image v1
- cmd2="bash $job_path/osd_job cat_image_v1 $data_path"
- ssh $ssh_option $host $cmd2 >> $image_coll_v1
- #image v2
- cmd3="bash $job_path/osd_job cat_image_v2 $data_path"
- ssh $ssh_option $host $cmd3 >> $image_coll_v2
- } &
- done < $osd_host_path
- wait
- echo "$func: finish"
-}
-
-function scatter_gather()
-{
- local func="scatter_gather"
- if [ ! -s $osd_host ];then
- echo "$func: no osd_host input"
- exit
- fi
- if [ ! -s $mon_host ];then
- echo "$func: no mon_host input"
- exit
- fi
- scatter_node_jobs
- gather_node_infos
-}
-
-
-#------------- operations --------------
-
-function database()
-{
- scatter_gather
- gen_database
-}
-
-function list()
-{
- list_images
-}
-
-function lookup()
-{
- lookup_image $1 $2 $3
-}
-
-function recover()
-{
- recover_image $1 $2 $3 $4
-}
-
-#------------- helper -------------
-
-function usage()
-{
- local cmd_name="rbd-recover-tool"
- echo
- echo "$cmd_name is used to recover rbd image of replicated pool,
- when all ceph services are stopped"
- echo "Usage:"
- echo "$cmd_name database
- gather pg info, object info, image metadata,
- and epoch info from all osd nodes,
- this will cosume a long time, just be patient,
- especially when scale up to 1000+ osds"
- echo "$cmd_name list
- list all rbd images of all replicated pools,
- before to lookup & recover"
- echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]]
- show image metadata: image format, rbd id, size, order, snapseq
- In addtion, for image with snapshots,
- this will list all snapshot infomations"
- echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>]
- all snapshots share one image head, to economize disk space
- so there is only one snapshot at any time,
- image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name
- cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT,
- will show snapid
- recover to raw image/nosnap/head: <image_name>
- rollback to image head: <image_name>@
- rollback to image snap: <image_name>@<snap_name>
- recover steps:
- 1. recover image nosnap (only one time)
- 2. rollback to image snap"
-}
-
-function get_path()
-{
- local func="get_path"
- if [ $# -lt 1 ];then
- return
- fi
- if [[ $1 =~ // ]];then
- return # "/path//to" is invalid
- fi
- local parent=`dirname $1`
- local name=`basename $1`
- if [ "$parent"x = "/"x ];then
- echo "$parent$name"
- else
- echo -n "$parent/$name"
- fi
-}
-
-function admin_cmd()
-{
- local func="admin_cmd"
- if [ $# -lt 1 ];then
- usage
- exit
- fi
- if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then
- usage
- exit
- fi
-
- if [ "$1"x = "database"x ];then
- if [ $# -gt 1 ];then
- usage
- exit
- fi
- # remove osd_host to refresh osd_host and osd_host_mapping
- rm -f $osd_host
- init_env_admin
- database
- elif [ "$1"x = "list"x ];then
- if [ $# -gt 1 ];then
- usage
- exit
- fi
- init_env_admin
- list
- elif [ "$1"x = "lookup"x ];then
- if [ $# -gt 2 ];then
- usage
- exit
- fi
- local pool_id=-1
- local image_name=
- local snap_name=
- if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
- pool_id="${BASH_REMATCH[1]}"
- image_name="${BASH_REMATCH[2]}"
- elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
- pool_id="${BASH_REMATCH[1]}"
- image_name="${BASH_REMATCH[2]}"
- snap_name="${BASH_REMATCH[3]}"
- else
- echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
- exit
- fi
- init_env_admin
- lookup $pool_id $image_name $snap_name
- elif [ "$1"x = "recover"x ];then
- if [ $# -lt 2 ] || [ $# -gt 3 ];then
- usage
- exit
- fi
- local pool_id=-1
- local image_name=
- local snap_name=@
- local image_dir=
- if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
- pool_id="${BASH_REMATCH[1]}"
- image_name="${BASH_REMATCH[2]}"
- elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
- pool_id="${BASH_REMATCH[1]}"
- image_name="${BASH_REMATCH[2]}"
- snap_name="${BASH_REMATCH[3]}"
- if [ "$snap_name"x = ""x ];then
- snap_name=@@
- fi
- else
- echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
- exit
- fi
- if [ $# = 3 ];then
- image_dir=`get_path $3`
- if [ "image_dir"x = ""x ];then
- echo "$3 invalid"
- exit
- fi
- fi
- init_env_admin
- recover $pool_id $image_name $snap_name $image_dir
- elif [ "$1"x = "scp_files"x ];then
- if [ $# -gt 1 ];then
- exit
- fi
- admin_parse_osd
- scp_files
- elif [ "$1"x = "scp_file"x ];then
- if [ $# -gt 2 ];then
- exit
- fi
- admin_parse_osd
- scp_file $2
- else
- echo "$func: $1: command not found"
- fi
-}
-
-admin_cmd $*