X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Ftest%2Ftest_lost.sh;fp=src%2Fceph%2Fsrc%2Ftest%2Ftest_lost.sh;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=4a976c03c3968e133040500f36a502e2632af3d7;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/src/test/test_lost.sh b/src/ceph/src/test/test_lost.sh deleted file mode 100755 index 4a976c0..0000000 --- a/src/ceph/src/test/test_lost.sh +++ /dev/null @@ -1,257 +0,0 @@ -#!/bin/bash -x - -# -# Test the lost object logic -# - -# Includes -source "`dirname $0`/test_common.sh" - -TEST_POOL=rbd - -# Functions -setup() { - export CEPH_NUM_OSD=$1 - vstart_config=$2 - - # Start ceph - ./stop.sh - - # set recovery start to a really long time to ensure that we don't start recovery - ./vstart.sh -d -n -o "$vstart_config" || die "vstart failed" - - # for exiting pools set size not greater than number of OSDs, - # so recovery from degraded ps is possible - local changed=0 - for pool in `./ceph osd pool ls`; do - local size=`./ceph osd pool get ${pool} size | awk '{print $2}'` - if [ "${size}" -gt "${CEPH_NUM_OSD}" ]; then - ./ceph osd pool set ${pool} size ${CEPH_NUM_OSD} - changed=1 - fi - done - if [ ${changed} -eq 1 ]; then - # XXX: When a pool has degraded pgs due to size greater than number - # of OSDs, after decreasing the size the recovery still could stuck - # and requires an additional kick. - ./ceph osd out 0 - ./ceph osd in 0 - fi - - poll_cmd "./ceph health" HEALTH_OK 1 30 -} - -recovery1_impl() { - # Write lots and lots of objects - write_objects 1 1 200 4000 $TEST_POOL - - # Take down osd1 - stop_osd 1 - - # Continue writing a lot of objects - write_objects 2 2 200 4000 $TEST_POOL - - # Bring up osd1 - restart_osd 1 - - # Finish peering. - sleep 15 - - # Stop osd0. - # At this point we have peered, but *NOT* recovered. - # Objects should be lost. - stop_osd 0 - - poll_cmd "./ceph pg debug degraded_pgs_exist" TRUE 3 120 - [ $? -eq 1 ] || die "Failed to see degraded PGs." - poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120 - [ $? -eq 1 ] || die "Failed to see unfound objects." - echo "Got unfound objects." - - restart_osd 0 - sleep 20 - start_recovery 2 - - # Turn on recovery and wait for it to complete. - poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120 - [ $? -eq 1 ] || die "Failed to recover unfound objects." - poll_cmd "./ceph pg debug degraded_pgs_exist" FALSE 3 120 - [ $? -eq 1 ] || die "Recovery never finished." -} - -recovery1() { - setup 2 'osd recovery delay start = 10000' - recovery1_impl -} - -lost1_impl() { - local flags="$@" - local lost_action=delete - local pgs_unfound pg - - if is_set revert_lost $flags; then - lost_action=revert - fi - - # Write lots and lots of objects - write_objects 1 1 20 8000 $TEST_POOL - - # Take down osd1 - stop_osd 1 - - # Continue writing a lot of objects - write_objects 2 2 20 8000 $TEST_POOL - - # Bring up osd1 - restart_osd 1 - - # Finish peering. - sleep 15 - - # Stop osd0. - # At this point we have peered, but *NOT* recovered. - # Objects should be lost. - stop_osd 0 - - # Since recovery can't proceed, stuff should be unfound. - poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120 - [ $? -eq 1 ] || die "Failed to see unfound objects." - - pgs_unfound=`./ceph health detail |awk '$1 = "pg" && /[0-9] unfound$/ {print $2}'` - - [ -n "$pgs_unfound" ] || die "no pg with unfound objects" - - for pg in $pgs_unfound; do - ./ceph pg $pg mark_unfound_lost revert && - die "mark_unfound_lost unexpectedly succeeded for pg $pg" - done - - if ! is_set mark_osd_lost $flags && ! is_set rm_osd $flags; then - return - fi - - if is_set try_to_fetch_unfound $flags; then - # Ask for an object while it's still unfound, and - # verify we get woken to an error when it's declared lost. - echo "trying to get one of the unfound objects" - ( - ./rados -c ./ceph.conf -p $TEST_POOL get obj02 $TEMPDIR/obj02 &&\ - die "expected radostool error" - ) & - fi - - if is_set mark_osd_lost $flags; then - ./ceph osd lost 0 --yes-i-really-mean-it - fi - - if is_set rm_osd $flags; then - ./ceph osd rm 0 - fi - - if ! is_set auto_mark_unfound_lost $flags; then - for pg in $pgs_unfound; do - ./ceph pg $pg mark_unfound_lost ${lost_action} || - die "mark_unfound_lost failed for pg $pg" - done - fi - - start_recovery 2 - - # Unfound objects go away and are turned into lost objects. - poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120 - [ $? -eq 1 ] || die "Unfound objects didn't go away." - - for pg in `ceph pg ls | awk '/^[0-9]/ {print $1}'`; do - ./ceph pg $pg mark_unfound_lost revert 2>&1 | - grep 'pg has no unfound objects' || - die "pg $pg has unfound objects" - done - - # Reading from a lost object gives back an error code. - # TODO: check error code - ./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01 - if [ lost_action = delete -a $? -eq 0 ]; then - die "expected radostool error" - elif [ lost_action = revert -a $? -ne 0 ]; then - die "unexpected radostool error" - fi - - if is_set try_to_fetch_unfound $flags; then - echo "waiting for the try_to_fetch_unfound \ -radostool instance to finish" - wait - fi -} - -lost1() { - setup 2 'osd recovery delay start = 10000' - lost1_impl mark_osd_lost revert_lost -} - -lost2() { - setup 2 'osd recovery delay start = 10000' - lost1_impl mark_osd_lost try_to_fetch_unfound -} - -lost3() { - setup 2 'osd recovery delay start = 10000' - lost1_impl rm_osd -} - -lost4() { - setup 2 'osd recovery delay start = 10000' - lost1_impl mark_osd_lost rm_osd -} - -lost5() { - setup 2 'osd recovery delay start = 10000' - lost1_impl mark_osd_lost auto_mark_unfound_lost -} - -all_osds_die_impl() { - poll_cmd "./ceph osd stat" '3 up, 3 in' 20 240 - [ $? -eq 1 ] || die "didn't start 3 osds" - - stop_osd 0 - stop_osd 1 - stop_osd 2 - - # wait for the MOSDPGStat timeout - poll_cmd "./ceph osd stat" '0 up' 20 240 - [ $? -eq 1 ] || die "all osds weren't marked as down" -} - -all_osds_die() { - setup 3 'osd mon report interval max = 60 - osd mon report interval min = 3 - mon osd report timeout = 60' - - all_osds_die_impl -} - -run() { - recovery1 || die "test failed" - - lost1 || die "test failed" - - # XXX: try_to_fetch_unfound test currently hangs on "waiting for the - # try_to_fetch_unfound radostool instance to finish" - #lost2 || die "test failed" - - lost3 || die "test failed" - - lost4 || die "test failed" - - # XXX: automatically marking lost is not implemented - #lost5 || die "test failed" - - all_osds_die || die "test failed" -} - -if [ -z "$@" ]; then - run - echo OK - exit 0 -fi - -$@