src/ceph/src/test/test_unfound.sh

   1 #!/bin/bash -x
   2
   3 #
   4 # Creates some unfound objects and then tests finding them.
   5 #
   6
   7 # Includes
   8 source "`dirname $0`/test_common.sh"
   9
  10 TEST_POOL=rbd
  11
  12 # Functions
  13 my_write_objects() {
  14         write_objects $1 $2 10 1000000 $TEST_POOL
  15 }
  16
  17 setup() {
  18         export CEPH_NUM_OSD=$1
  19
  20         # Start ceph
  21         ./stop.sh
  22
  23         # set recovery start to a really long time to ensure that we don't start recovery
  24         ./vstart.sh -d -n -o 'osd recovery delay start = 10000
  25 osd max scrubs = 0' || die "vstart failed"
  26 }
  27
  28 osd_resurrection_1_impl() {
  29         # Write lots and lots of objects
  30         my_write_objects 1 2
  31
  32         # Take down osd1
  33         stop_osd 1
  34
  35         # Continue writing a lot of objects
  36         my_write_objects 3 4
  37
  38         # Bring up osd1
  39         restart_osd 1
  40
  41         # Finish peering.
  42         sleep 15
  43
  44         # Stop osd0.
  45         # At this point we have peered, but *NOT* recovered.
  46         # Objects should be lost.
  47         stop_osd 0
  48
  49         poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120
  50         [ $? -eq 1 ] || die "Failed to see unfound objects."
  51         echo "Got unfound objects."
  52
  53         (
  54                 ./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01 || die "radostool failed"
  55         ) &
  56         sleep 5
  57         [ -e $TEMPDIR/obj01 ] && die "unexpected error: fetched unfound object?"
  58
  59         restart_osd 0
  60
  61         poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
  62         [ $? -eq 1 ] || die "Failed to recover unfound objects."
  63
  64         wait
  65         [ -e $TEMPDIR/obj01 ] || die "unexpected error: failed to fetched newly-found object"
  66
  67         # Turn off recovery delay start and verify that every osd gets copies
  68         # of the correct objects.
  69         echo "starting recovery..."
  70         start_recovery 2
  71
  72         # success
  73         return 0
  74 }
  75
  76 osd_resurrection_1() {
  77         setup 2
  78         osd_resurrection_1_impl
  79 }
  80
  81 stray_test_impl() {
  82         stop_osd 0
  83         # 0:stopped 1:active 2:active
  84
  85         my_write_objects 1 1
  86
  87         stop_osd 1
  88         sleep 15
  89         # 0:stopped 1:stopped(ver1) 2:active(ver1)
  90
  91         my_write_objects 2 2
  92
  93         restart_osd 1
  94         sleep 15
  95         # 0:stopped 1:active(ver1) 2:active(ver2)
  96
  97         stop_osd 2
  98         sleep 15
  99         # 0:stopped 1:active(ver1) 2:stopped(ver2)
 100
 101         restart_osd 0
 102         sleep 15
 103         # 0:active 1:active(ver1) 2:stopped(ver2)
 104
 105         poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 5 300
 106         [ $? -eq 1 ] || die "Failed to see unfound objects."
 107
 108         #
 109         # Now, when we bring up osd2, it wil be considered a stray. However, it
 110         # has the version that we need-- the very latest version of the
 111         # objects.
 112         #
 113
 114         restart_osd 2
 115         sleep 15
 116
 117         poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 4 240
 118         [ $? -eq 1 ] || die "Failed to discover unfound objects."
 119
 120         echo "starting recovery..."
 121         start_recovery 3
 122
 123         # success
 124         return 0
 125 }
 126
 127 stray_test() {
 128         setup 3
 129         stray_test_impl
 130 }
 131
 132 run() {
 133         osd_resurrection_1 || die "test failed"
 134
 135         stray_test || die "test failed"
 136 }
 137
 138 $@