3 # ceph-lazy : Be efficient, be lazy !
5 # Author: Gregory Charot <gcharot@redhat.com>
7 # This is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU Lesser General Public
9 # License as published by the Free Software Foundation; either
10 # version 2.1 of the License, or (at your option) any later version.
13 # Specify exta option for ceph like the username/keyring/etc. Can also be done with CEPH_ARGS global variable
14 #CEPH_OPT="-n client.username"
18 # Print info message to stderr
22 printf "INFO: %s\n" "$*" >&2;
27 # Print error message to stderr
31 printf "ERROR: %s\n" "$*" >&2;
36 >&2 echo "Usage : ceph-lazy [-d | -h] [command] [parameters]
38 Ceph complex quering tool - Version $VERSION
42 -d Activate debug mode
50 host-get-osd hostname List all OSD IDs attached to a particular node.
51 host-get-nodes List all storage nodes.
52 host-osd-usage hostname Show total OSD space usage of a particular node (-d for details).
53 host-all-usage Show total OSD space usage of each nodes (-d for details)
57 pg-get-host pgid Find PG storage hosts (first is primary)
58 pg-most-write Find most written PG (nb operations)
59 pg-less-write Find less written PG (nb operations)
60 pg-most-write-kb Find most written PG (data written)
61 pg-less-write-kb Find less written PG (data written)
62 pg-most-read Find most read PG (nb operations)
63 pg-less-read Find less read PG (nb operations)
64 pg-most-read-kb Find most read PG (data read)
65 pg-less-read-kb Find less read PG (data read)
66 pg-empty Find empty PGs (no stored object)
70 rbd-prefix pool_name image_name Return RBD image prefix
71 rbd-count pool_name image_name Count number of objects in a RBD image
72 rbd-host pool_name image_name Find RBD primary storage hosts
73 rbd-osd pool_name image_name Find RBD primary OSDs
74 rbd-size pool_name image_name Print RBD image real size
75 rbd-all-size pool_name Print all RBD images size (Top first)
79 osd-most-used Show the most used OSD (capacity)
80 osd-less-used Show the less used OSD (capacity)
81 osd-get-ppg osd_id Show all primaries PGS hosted on a OSD
82 osd-get-pg osd_id Show all PGS hosted on a OSD
86 object-get-host pool_name object_id Find object storage hosts (first is primary)
94 function check_requirements()
97 # List of command dependencies
98 local bin_dep="ceph rados rbd osdmaptool jq"
100 for cmd in $bin_dep; do
101 [ $DEBUG -eq 1 ] && echoinfo "Checking for $cmd..."
102 $cmd --version >/dev/null 2>&1 || { echoerr "$cmd cannot be found... Aborting."; return 1; }
105 CEPH="ceph $CEPH_OPT"
107 [ $DEBUG -eq 1 ] && echoinfo "Checking Ceph connectivity & basic permissions..."
109 if ! $CEPH -s &> /dev/null; then
110 echoerr "Cannot connect to cluster, please check your username & permissions"
111 echoerr "Command $CEPH -s failed"
115 JQ="jq -M --raw-output"
119 # Print the host that hosts a specific PG
121 function find_host_from_pg() {
123 if [ $# -eq 1 ]; then
126 echoerr "This command requires one argument"
131 [ $DEBUG -eq 1 ] && echoinfo "PG $PGID has been found at (first is primary) : "
133 for osd in $($CEPH pg $PGID query | $JQ -cr .up[]); do
134 echo -n "OSD:osd.$osd | Host:"
135 $CEPH osd find $osd --format json 2> /dev/null | $JQ .crush_location.host
141 # Print the host that hosts a specific object
143 function find_host_from_object() {
145 if [ $# -eq 2 ]; then
149 echoerr "This command requires two arguments"
154 local pgid=$($CEPH osd map $pool $objid --format json 2> /dev/null | $JQ -cr .pgid)
156 [ $DEBUG -eq 1 ] && echoinfo $objid found into PG $pgid
159 echo "PG:$pgid | $host"
160 done < <(find_host_from_pg $pgid)
165 # Print all primary pgs hosted by an OSD
167 function find_prim_pg_from_osd() {
169 if [ $# -eq 1 ]; then
172 echoerr "This command requires one argument"
177 [ $DEBUG -eq 1 ] && echoinfo "Looking for primary PGs beloging to OSD $posd"
178 $CEPH pg dump pgs --format json 2>/dev/null | $JQ --argjson posd $posd '.[] | select(.acting_primary==$posd).pgid'
183 # Print all pgs (primay & secondary) hosted by an OSD
185 function find_all_pg_from_osd() {
187 if [ $# -eq 1 ]; then
190 echoerr "This command requires one argument"
195 [ $DEBUG -eq 1 ] && echoinfo "Looking for all PGs mapped to OSD $osd"
196 $CEPH pg dump pgs --format json 2> /dev/null | $JQ -M --argjson osd $osd '.[] | select(.up[]==$osd).pgid'
201 # Check if a given image exists
203 function check_rbd_exists(){
208 if ! rbd info -p $pool $rbd &> /dev/null; then
209 echoerr "Unable to find image $pool/$rbd"
216 # Return RBD prefix from image name
218 function get_rbd_prefix() {
220 if [ $# -eq 2 ]; then
224 echoerr "This command requires two arguments"
229 check_rbd_exists $pool $rbd
231 local prefix=$(rbd --image $rbd -p $pool info --format json 2> /dev/null | jq --raw-output .block_name_prefix)
232 if [ -z $prefix ]; then
233 echoerr "Unable to find RBD Prefix for image $pool/$rbd"
243 # Count number of object in a RBD image
245 function count_rbd_object() {
247 if [ $# -eq 2 ]; then
251 echoerr "This command requires two arguments"
256 check_rbd_exists $pool $rbd
258 local rbd_prefix=$(get_rbd_prefix $pool $rbd)
260 [ $DEBUG -eq 1 ] && echoinfo "RBD image $pool/$rbd has prefix $rbd_prefix; now couning objects..."
262 local nb_obj=$(rados -p $pool ls | grep $rbd_prefix | wc -l)
264 [ $DEBUG -eq 1 ] && echoinfo "RBD image $pool/$rbd has $nb_obj objects"
270 # Find primary storage host for a given RBD image
272 function find_prim_host_from_rbd() {
274 if [ $# -eq 2 ]; then
278 echoerr "This command requires two arguments"
283 check_rbd_exists $pool $rbd
286 local osdmap_t=$(mktemp)
287 local osdtree_t=$(mktemp)
288 # Get RBD image prefix
289 local rbd_prefix=$(get_rbd_prefix $pool $rbd)
290 # Exit if we received an empty prefix
291 [ -z $rbd_prefix ] && exit 1
293 # Get pool ID from pool name
294 local pool_id=$(ceph osd lspools -f json | $JQ -M --arg pool $pool '.[]|select(.poolname==$pool).poolnum')
296 [ $DEBUG -eq 1 ] && echoinfo "RBD image $pool/$rbd has prefix $rbd_prefix; now finding primary host..."
298 [ $DEBUG -eq 1 ] && echoinfo "Dumping OSD map to $osdmap_t"
299 if ! $CEPH osd getmap > $osdmap_t 2> /dev/null; then
300 echoerr "Failed to retrieve OSD map"
304 [ $DEBUG -eq 1 ] && echoinfo "Dumping OSD tree to $osdtree_t"
306 if ! $CEPH osd tree --format json > $osdtree_t; then
307 echoerr "Failed to retrieve OSD tree"
311 [ $DEBUG -eq 1 ] && echoinfo "Looking for hosts..."
313 # For each object in the RBD image
314 for obj in $(rados -p $pool ls | grep $rbd_prefix);
316 # Map object to osd. osdmaptoot does not support json output so using dirty sed.
317 osd=$(osdmaptool --test-map-object $obj --pool $pool_id $osdmap_t 2>/dev/null | sed -r 's/.*\[([[:digit:]]+),.*/\1/' | grep -v osdmaptool)
319 $JQ --argjson osd $osd '.nodes[] | select(.type=="host") | select(.children[] == $osd).name' $osdtree_t
323 rm -f $osdtree_t $osdmap_t
328 # Find primary OSDs for a given RBD image
330 function find_prim_osd_from_rbd() {
332 if [ $# -eq 2 ]; then
336 echoerr "This command requires two arguments"
341 check_rbd_exists $pool $rbd
344 local osdmap_t=$(mktemp)
345 local osdtree_t=$(mktemp)
346 # Get RBD image prefix
347 local rbd_prefix=$(get_rbd_prefix $pool $rbd)
349 # Exit if we received an empty prefix
350 [ -z $rbd_prefix ] && exit 1
352 [ $DEBUG -eq 1 ] && echoinfo "RBD image $pool/$rbd has prefix $rbd_prefix; now finding primary OSDs..."
354 [ $DEBUG -eq 1 ] && echoinfo "Dumping OSD map to $osdmap_t"
355 if ! $CEPH osd getmap > $osdmap_t; then
356 echoerr "Failed to retrieve OSD map"
360 # For each object in the RBD image
361 for obj in $(rados -p $pool ls | grep $rbd_prefix);
363 # Map object to osd. osdmaptoot does not support json output so using dirty sed.
364 osd=$(osdmaptool --test-map-object $obj $osdmap_t 2>/dev/null | sed -r 's/.*\[([[:digit:]]+),.*/\1/' | grep -v osdmaptool)
374 # Print RBD image real size - Source http://ceph.com/planet/real-size-of-a-ceph-rbd-image/
377 function print_rbd_real_size {
379 if [ $# -eq 2 ]; then
383 echoerr "This command requires two arguments"
388 [ $DEBUG -eq 1 ] && echoinfo "Checking if RBD image exists..."
390 check_rbd_exists $pool $rbd
392 rbd diff $pool/$rbd | awk '{ SUM += $2 } END { print SUM/1024/1024 " MB" }'
398 # Print all RBD image real sizes - Top first
401 function list_all_rbd_real_size {
403 if [ $# -eq 1 ]; then
406 echoerr "This command requires one argument"
411 [ $DEBUG -eq 1 ] && echoinfo "Looking for RBD images in pool $pool"
414 [ $DEBUG -eq 1 ] && echoinfo "Inspecting image $rbd"
415 rbd diff $pool/$rbd | awk -v rbd="$rbd" '{ SUM += $2 } END { print SUM/1024/1024 " MB - " rbd }'
416 done < <(rbd -p $pool ls) | sort -rV
421 # Print OSDs belonging to a particular storage host
424 function list_osd_from_host() {
426 if [ $# -eq 1 ]; then
429 echoerr "This command requires one argument"
434 $CEPH osd tree --format json-pretty 2> /dev/null | $JQ --arg host $host '.nodes[] | select(.type=="host") | select(.name == $host).children[]' | sort -V
443 function list_all_nodes() {
446 $CEPH osd tree --format json | $JQ -M --raw-output '.nodes[] | select(.type=="host") | .name' | sort -V
452 # Print Total OSD usage of a particular storage host
455 function show_host_osd_usage() {
457 if [ $# -eq 1 ]; then
460 echoerr "This command requires one argument"
465 local pgmap_t=$(mktemp)
468 local total_used_kb=0
470 local total_available_kb=0
471 local osd_available_kb=0
473 local total_size_kb=0
477 [ $DEBUG -eq 1 ] && echoinfo "Dumping PG map..."
478 if ! $CEPH pg dump osds --format json 2>/dev/null > $pgmap_t; then
479 echoerr "Failed to retrieve PG map"
483 [ $DEBUG -eq 1 ] && echoinfo "Looking for all OSDs on host $host..."
485 for osd in $(list_osd_from_host $host); do
487 osd_used_kb=$($JQ --argjson osd $osd '.[] | select(.osd == $osd).kb_used' $pgmap_t)
488 osd_available_kb=$($JQ --argjson osd $osd '.[] | select(.osd == $osd).kb_avail' $pgmap_t)
489 osd_size_kb=$($JQ --argjson osd $osd '.[] | select(.osd == $osd).kb' $pgmap_t)
491 [ $DEBUG -eq 1 ] && echoinfo "OSD:$osd | Size:$(echo "scale=1;$osd_size_kb/1024/1024" | bc -l)GB | Used:$(echo "scale=1;$osd_used_kb /1024/1024" | bc -l)GB | Available:$(echo "scale=1;$osd_available_kb/1024/1024" | bc -l)GB"
493 let "total_used_kb=total_used_kb+osd_used_kb"
494 let "total_available_kb=total_available_kb+osd_available_kb"
495 let "total_size_kb=total_size_kb+osd_size_kb"
500 echo "Host:$host | OSDs:$nb_osd | Total_Size:$(echo "scale=1;$total_size_kb/1024/1024" | bc -l)GB | Total_Used:$(echo "scale=1;$total_used_kb /1024/1024" | bc -l)GB | Total_Available:$(echo "scale=1;$total_available_kb/1024/1024" | bc -l)GB"
507 # Print Total OSD usage of all nodes
510 function list_all_nodes_osd_usage() {
513 for host in $(list_all_nodes); do
515 [ $DEBUG -eq 1 ] && echoinfo "Looking at node $host..."
517 show_host_osd_usage $host
524 # Find most used (space) OSD
527 function find_most_used_osd() {
529 local osd=$($CEPH pg dump osds --format json 2> /dev/null| $JQ 'max_by(.kb_used) | .osd')
530 local host=$($CEPH osd find $osd 2> /dev/null | $JQ .crush_location.host)
532 echo "OSD:osd.${osd} | host:$host"
537 # Find less used (space) OSD
540 function find_less_used_osd() {
542 local osd=$($CEPH pg dump osds --format json 2> /dev/null| $JQ 'min_by(.kb_used) | .osd')
543 local host=$($CEPH osd find $osd 2> /dev/null | $JQ .crush_location.host)
545 echo "OSD:osd.${osd} | host:$host"
553 function pg_stat_query() {
555 if [ $# -eq 1 ]; then
558 echoerr "This command requires one argument"
563 local pgmap_t=$(mktemp)
565 [ $DEBUG -eq 1 ] && echoinfo "Dumping PG map..."
566 if ! $CEPH pg dump pgs --format json 2>/dev/null > $pgmap_t; then
567 echoerr "Failed to retrieve PG map"
571 local pgid=$($JQ --arg query_type $query_type "$query_type" $pgmap_t)
572 [ $DEBUG -eq 1 ] && echoinfo "Found PGID $pgid"
574 local osd=$($JQ --arg pgid $pgid '.[] | select(.pgid == $pgid).acting_primary' $pgmap_t)
575 [ $DEBUG -eq 1 ] && echoinfo "Found OSD $osd"
577 local host=$($CEPH osd find $osd --format json 2> /dev/null | $JQ .crush_location.host)
578 [ $DEBUG -eq 1 ] && echoinfo "Found host $host"
580 echo "PG:$pgid | OSD:osd.$osd | Host:$host"
587 # Find empty pgs (no object stored)
590 function find_empty_pg() {
592 $CEPH pg dump pgs --format json 2>/dev/null | $JQ '.[] | select(.stat_sum.num_objects == 0).pgid'
602 # Print help if no argument is given
603 if [ $# -eq 0 ]; then
608 # Activate debug mode if -d is specified as first parameter
609 if [ "$1" = "-d" ]; then
610 echoinfo "Debug mode activated"
618 # Check if all requirements are met
619 check_requirements || exit 1
622 # Call proper function
629 list_osd_from_host $2
635 show_host_osd_usage $2
638 list_all_nodes_osd_usage
644 pg_stat_query "max_by(.stat_sum.num_write).pgid"
647 pg_stat_query "min_by(.stat_sum.num_write).pgid"
650 pg_stat_query "max_by(.stat_sum.num_write_kb).pgid"
653 pg_stat_query "min_by(.stat_sum.num_write_kb).pgid"
656 pg_stat_query "max_by(.stat_sum.num_read).pgid"
659 pg_stat_query "min_by(.stat_sum.num_read).pgid"
662 pg_stat_query "max_by(.stat_sum.num_read_kb).pgid"
665 pg_stat_query "min_by(.stat_sum.num_read_kb).pgid"
671 count_rbd_object $2 $3
674 find_prim_host_from_rbd $2 $3
677 find_prim_osd_from_rbd $2 $3
680 print_rbd_real_size $2 $3
683 list_all_rbd_real_size $2
692 find_prim_pg_from_osd $2
695 find_all_pg_from_osd $2
701 find_host_from_object $2 $3
704 echoerr "Unknown command : $1"