Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / qa / workunits / rbd / rbd_mirror_ha.sh
1 #!/bin/sh
2 #
3 # rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode
4 #
5
6 . $(dirname $0)/rbd_mirror_helpers.sh
7
8 is_leader()
9 {
10     local instance=$1
11     local pool=$2
12
13     test -n "${pool}" || pool=${POOL}
14
15     admin_daemon "${CLUSTER1}:${instance}" \
16                  rbd mirror status ${pool} ${CLUSTER2} |
17         grep '"leader": true'
18 }
19
20 wait_for_leader()
21 {
22     local s instance
23
24     for s in 1 1 2 4 4 4 4 4 8 8 8 8 16 16 32 64; do
25         sleep $s
26         for instance in `seq 0 9`; do
27             is_leader ${instance} || continue
28             LEADER=${instance}
29             return 0
30         done
31     done
32
33     LEADER=
34     return 1
35 }
36
37 release_leader()
38 {
39     local pool=$1
40     local cmd="rbd mirror leader release"
41
42     test -n "${pool}" && cmd="${cmd} ${pool} ${CLUSTER2}"
43
44     admin_daemon "${CLUSTER1}:${LEADER}" ${cmd}
45 }
46
47 wait_for_leader_released()
48 {
49     local i
50
51     test -n "${LEADER}"
52     for i in `seq 10`; do
53         is_leader ${LEADER} || return 0
54         sleep 1
55     done
56
57     return 1
58 }
59
60 test_replay()
61 {
62     local image
63
64     for image; do
65         wait_for_image_replay_started ${CLUSTER1}:${LEADER} ${POOL} ${image}
66         write_image ${CLUSTER2} ${POOL} ${image} 100
67         wait_for_replay_complete ${CLUSTER1}:${LEADER} ${CLUSTER2} ${POOL} \
68                                  ${image}
69         wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' \
70                                     'master_position'
71         if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
72             wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} \
73                                         'down+unknown'
74         fi
75         compare_images ${POOL} ${image}
76     done
77 }
78
79 testlog "TEST: start first daemon instance and test replay"
80 start_mirror ${CLUSTER1}:0
81 image1=test1
82 create_image ${CLUSTER2} ${POOL} ${image1}
83 LEADER=0
84 test_replay ${image1}
85
86 testlog "TEST: release leader and wait it is reacquired"
87 is_leader 0 ${POOL}
88 is_leader 0 ${PARENT_POOL}
89 release_leader ${POOL}
90 wait_for_leader_released
91 is_leader 0 ${PARENT_POOL}
92 wait_for_leader
93 release_leader
94 wait_for_leader_released
95 expect_failure "" is_leader 0 ${PARENT_POOL}
96 wait_for_leader
97
98 testlog "TEST: start second daemon instance and test replay"
99 start_mirror ${CLUSTER1}:1
100 image2=test2
101 create_image ${CLUSTER2} ${POOL} ${image2}
102 test_replay ${image1} ${image2}
103
104 testlog "TEST: release leader and test it is acquired by secondary"
105 is_leader 0 ${POOL}
106 is_leader 0 ${PARENT_POOL}
107 release_leader ${POOL}
108 wait_for_leader_released
109 wait_for_leader
110 test_replay ${image1} ${image2}
111 release_leader
112 wait_for_leader_released
113 wait_for_leader
114 test "${LEADER}" = 0
115
116 testlog "TEST: stop first daemon instance and test replay"
117 stop_mirror ${CLUSTER1}:0
118 image3=test3
119 create_image ${CLUSTER2} ${POOL} ${image3}
120 LEADER=1
121 test_replay ${image1} ${image2} ${image3}
122
123 testlog "TEST: start first daemon instance and test replay"
124 start_mirror ${CLUSTER1}:0
125 image4=test4
126 create_image ${CLUSTER2} ${POOL} ${image4}
127 test_replay ${image3} ${image4}
128
129 testlog "TEST: crash leader and test replay"
130 stop_mirror ${CLUSTER1}:1 -KILL
131 image5=test5
132 create_image ${CLUSTER2} ${POOL} ${image5}
133 LEADER=0
134 test_replay ${image1} ${image4} ${image5}
135
136 testlog "TEST: start crashed leader and test replay"
137 start_mirror ${CLUSTER1}:1
138 image6=test6
139 create_image ${CLUSTER2} ${POOL} ${image6}
140 test_replay ${image1} ${image6}
141
142 testlog "TEST: start yet another daemon instance and test replay"
143 start_mirror ${CLUSTER1}:2
144 image7=test7
145 create_image ${CLUSTER2} ${POOL} ${image7}
146 test_replay ${image1} ${image7}
147
148 testlog "TEST: release leader and test it is acquired by secondary"
149 is_leader 0
150 release_leader
151 wait_for_leader_released
152 wait_for_leader
153 test_replay ${image1} ${image2}
154
155 testlog "TEST: stop leader and test replay"
156 stop_mirror ${CLUSTER1}:${LEADER}
157 image8=test8
158 create_image ${CLUSTER2} ${POOL} ${image8}
159 prev_leader=${LEADER}
160 wait_for_leader
161 test_replay ${image1} ${image8}
162
163 testlog "TEST: start previous leader and test replay"
164 start_mirror ${CLUSTER1}:${prev_leader}
165 image9=test9
166 create_image ${CLUSTER2} ${POOL} ${image9}
167 test_replay ${image1} ${image9}
168
169 testlog "TEST: crash leader and test replay"
170 stop_mirror ${CLUSTER1}:${LEADER} -KILL
171 image10=test10
172 create_image ${CLUSTER2} ${POOL} ${image10}
173 prev_leader=${LEADER}
174 wait_for_leader
175 test_replay ${image1} ${image10}
176
177 testlog "TEST: start previous leader and test replay"
178 start_mirror ${CLUSTER1}:${prev_leader}
179 image11=test11
180 create_image ${CLUSTER2} ${POOL} ${image11}
181 test_replay ${image1} ${image11}
182
183 testlog "TEST: start some more daemon instances and test replay"
184 start_mirror ${CLUSTER1}:3
185 start_mirror ${CLUSTER1}:4
186 start_mirror ${CLUSTER1}:5
187 start_mirror ${CLUSTER1}:6
188 image13=test13
189 create_image ${CLUSTER2} ${POOL} ${image13}
190 test_replay ${leader} ${image1} ${image13}
191
192 testlog "TEST: release leader and test it is acquired by secondary"
193 release_leader
194 wait_for_leader_released
195 wait_for_leader
196 test_replay ${image1} ${image2}
197
198 testlog "TEST: in loop: stop leader and test replay"
199 for i in 0 1 2 3 4 5; do
200     stop_mirror ${CLUSTER1}:${LEADER}
201     wait_for_leader
202     test_replay ${image1}
203 done
204
205 stop_mirror ${CLUSTER1}:${LEADER}
206
207 echo OK