initial code repo
[stor4nfv.git] / src / ceph / src / test / test_unfound.sh
diff --git a/src/ceph/src/test/test_unfound.sh b/src/ceph/src/test/test_unfound.sh
new file mode 100755 (executable)
index 0000000..35e8483
--- /dev/null
@@ -0,0 +1,138 @@
+#!/bin/bash -x
+
+#
+# Creates some unfound objects and then tests finding them.
+#
+
+# Includes
+source "`dirname $0`/test_common.sh"
+
+TEST_POOL=rbd
+
+# Functions
+my_write_objects() {
+        write_objects $1 $2 10 1000000 $TEST_POOL
+}
+
+setup() {
+        export CEPH_NUM_OSD=$1
+
+        # Start ceph
+        ./stop.sh
+
+        # set recovery start to a really long time to ensure that we don't start recovery
+        ./vstart.sh -d -n -o 'osd recovery delay start = 10000
+osd max scrubs = 0' || die "vstart failed"
+}
+
+osd_resurrection_1_impl() {
+        # Write lots and lots of objects
+        my_write_objects 1 2
+
+        # Take down osd1
+        stop_osd 1
+
+        # Continue writing a lot of objects
+        my_write_objects 3 4
+
+        # Bring up osd1
+        restart_osd 1
+
+        # Finish peering.
+        sleep 15
+
+        # Stop osd0.
+        # At this point we have peered, but *NOT* recovered.
+        # Objects should be lost.
+        stop_osd 0
+
+       poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 3 120
+        [ $? -eq 1 ] || die "Failed to see unfound objects."
+        echo "Got unfound objects."
+
+        (
+                ./rados -c ./ceph.conf -p $TEST_POOL get obj01 $TEMPDIR/obj01 || die "radostool failed"
+        ) &
+        sleep 5
+        [ -e $TEMPDIR/obj01 ] && die "unexpected error: fetched unfound object?"
+
+        restart_osd 0
+
+       poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 3 120
+        [ $? -eq 1 ] || die "Failed to recover unfound objects."
+
+        wait
+        [ -e $TEMPDIR/obj01 ] || die "unexpected error: failed to fetched newly-found object"
+
+        # Turn off recovery delay start and verify that every osd gets copies
+        # of the correct objects.
+        echo "starting recovery..."
+        start_recovery 2
+
+        # success
+        return 0
+}
+
+osd_resurrection_1() {
+        setup 2
+        osd_resurrection_1_impl
+}
+
+stray_test_impl() {
+        stop_osd 0
+        # 0:stopped 1:active 2:active
+
+        my_write_objects 1 1
+
+        stop_osd 1
+        sleep 15
+        # 0:stopped 1:stopped(ver1) 2:active(ver1)
+
+        my_write_objects 2 2
+
+        restart_osd 1
+        sleep 15
+        # 0:stopped 1:active(ver1) 2:active(ver2)
+
+        stop_osd 2
+        sleep 15
+        # 0:stopped 1:active(ver1) 2:stopped(ver2)
+
+        restart_osd 0
+        sleep 15
+        # 0:active 1:active(ver1) 2:stopped(ver2)
+
+       poll_cmd "./ceph pg debug unfound_objects_exist" TRUE 5 300
+        [ $? -eq 1 ] || die "Failed to see unfound objects."
+
+        #
+        # Now, when we bring up osd2, it wil be considered a stray. However, it
+        # has the version that we need-- the very latest version of the
+        # objects.
+        #
+
+        restart_osd 2
+        sleep 15
+
+       poll_cmd "./ceph pg debug unfound_objects_exist" FALSE 4 240
+        [ $? -eq 1 ] || die "Failed to discover unfound objects."
+
+        echo "starting recovery..."
+        start_recovery 3
+
+        # success
+        return 0
+}
+
+stray_test() {
+        setup 3
+        stray_test_impl
+}
+
+run() {
+        osd_resurrection_1 || die "test failed"
+
+        stray_test || die "test failed"
+}
+
+$@