Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / qa / tasks / cephfs / test_client_recovery.py
1
2 """
3 Teuthology task for exercising CephFS client recovery
4 """
5
6 import logging
7 from textwrap import dedent
8 import time
9 import distutils.version as version
10 import re
11 import os
12
13 from teuthology.orchestra.run import CommandFailedError, ConnectionLostError
14 from tasks.cephfs.cephfs_test_case import CephFSTestCase
15 from teuthology.packaging import get_package_version
16
17
18 log = logging.getLogger(__name__)
19
20
21 # Arbitrary timeouts for operations involving restarting
22 # an MDS or waiting for it to come up
23 MDS_RESTART_GRACE = 60
24
25
26 class TestClientNetworkRecovery(CephFSTestCase):
27     REQUIRE_KCLIENT_REMOTE = True
28     REQUIRE_ONE_CLIENT_REMOTE = True
29     CLIENTS_REQUIRED = 2
30
31     LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
32
33     # Environment references
34     mds_session_timeout = None
35     mds_reconnect_timeout = None
36     ms_max_backoff = None
37
38     def test_network_death(self):
39         """
40         Simulate software freeze or temporary network failure.
41
42         Check that the client blocks I/O during failure, and completes
43         I/O after failure.
44         """
45
46         # We only need one client
47         self.mount_b.umount_wait()
48
49         # Initially our one client session should be visible
50         client_id = self.mount_a.get_global_id()
51         ls_data = self._session_list()
52         self.assert_session_count(1, ls_data)
53         self.assertEqual(ls_data[0]['id'], client_id)
54         self.assert_session_state(client_id, "open")
55
56         # ...and capable of doing I/O without blocking
57         self.mount_a.create_files()
58
59         # ...but if we turn off the network
60         self.fs.set_clients_block(True)
61
62         # ...and try and start an I/O
63         write_blocked = self.mount_a.write_background()
64
65         # ...then it should block
66         self.assertFalse(write_blocked.finished)
67         self.assert_session_state(client_id, "open")
68         time.sleep(self.mds_session_timeout * 1.5)  # Long enough for MDS to consider session stale
69         self.assertFalse(write_blocked.finished)
70         self.assert_session_state(client_id, "stale")
71
72         # ...until we re-enable I/O
73         self.fs.set_clients_block(False)
74
75         # ...when it should complete promptly
76         a = time.time()
77         self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
78         write_blocked.wait()  # Already know we're finished, wait() to raise exception on errors
79         recovery_time = time.time() - a
80         log.info("recovery time: {0}".format(recovery_time))
81         self.assert_session_state(client_id, "open")
82
83
84 class TestClientRecovery(CephFSTestCase):
85     REQUIRE_KCLIENT_REMOTE = True
86     CLIENTS_REQUIRED = 2
87
88     LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
89
90     # Environment references
91     mds_session_timeout = None
92     mds_reconnect_timeout = None
93     ms_max_backoff = None
94
95     def test_basic(self):
96         # Check that two clients come up healthy and see each others' files
97         # =====================================================
98         self.mount_a.create_files()
99         self.mount_a.check_files()
100         self.mount_a.umount_wait()
101
102         self.mount_b.check_files()
103
104         self.mount_a.mount()
105         self.mount_a.wait_until_mounted()
106
107         # Check that the admin socket interface is correctly reporting
108         # two sessions
109         # =====================================================
110         ls_data = self._session_list()
111         self.assert_session_count(2, ls_data)
112
113         self.assertSetEqual(
114             set([l['id'] for l in ls_data]),
115             {self.mount_a.get_global_id(), self.mount_b.get_global_id()}
116         )
117
118     def test_restart(self):
119         # Check that after an MDS restart both clients reconnect and continue
120         # to handle I/O
121         # =====================================================
122         self.fs.mds_fail_restart()
123         self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
124
125         self.mount_a.create_destroy()
126         self.mount_b.create_destroy()
127
128     def _session_num_caps(self, client_id):
129         ls_data = self.fs.mds_asok(['session', 'ls'])
130         return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])
131
132     def test_reconnect_timeout(self):
133         # Reconnect timeout
134         # =================
135         # Check that if I stop an MDS and a client goes away, the MDS waits
136         # for the reconnect period
137         self.fs.mds_stop()
138         self.fs.mds_fail()
139
140         mount_a_client_id = self.mount_a.get_global_id()
141         self.mount_a.umount_wait(force=True)
142
143         self.fs.mds_restart()
144
145         self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
146         # Check that the MDS locally reports its state correctly
147         status = self.fs.mds_asok(['status'])
148         self.assertIn("reconnect_status", status)
149
150         ls_data = self._session_list()
151         self.assert_session_count(2, ls_data)
152
153         # The session for the dead client should have the 'reconnect' flag set
154         self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])
155
156         # Wait for the reconnect state to clear, this should take the
157         # reconnect timeout period.
158         in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
159         # Check that the period we waited to enter active is within a factor
160         # of two of the reconnect timeout.
161         self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout / 2,
162                            "Should have been in reconnect phase for {0} but only took {1}".format(
163                                self.mds_reconnect_timeout, in_reconnect_for
164                            ))
165
166         self.assert_session_count(1)
167
168         # Check that the client that timed out during reconnect can
169         # mount again and do I/O
170         self.mount_a.mount()
171         self.mount_a.wait_until_mounted()
172         self.mount_a.create_destroy()
173
174         self.assert_session_count(2)
175
176     def test_reconnect_eviction(self):
177         # Eviction during reconnect
178         # =========================
179         mount_a_client_id = self.mount_a.get_global_id()
180
181         self.fs.mds_stop()
182         self.fs.mds_fail()
183
184         # The mount goes away while the MDS is offline
185         self.mount_a.kill()
186
187         self.fs.mds_restart()
188
189         # Enter reconnect phase
190         self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
191         self.assert_session_count(2)
192
193         # Evict the stuck client
194         self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
195         self.assert_session_count(1)
196
197         # Observe that we proceed to active phase without waiting full reconnect timeout
198         evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
199         # Once we evict the troublemaker, the reconnect phase should complete
200         # in well under the reconnect timeout.
201         self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
202                         "reconnect did not complete soon enough after eviction, took {0}".format(
203                             evict_til_active
204                         ))
205
206         # We killed earlier so must clean up before trying to use again
207         self.mount_a.kill_cleanup()
208
209         # Bring the client back
210         self.mount_a.mount()
211         self.mount_a.wait_until_mounted()
212         self.mount_a.create_destroy()
213
214     def test_stale_caps(self):
215         # Capability release from stale session
216         # =====================================
217         cap_holder = self.mount_a.open_background()
218
219         # Wait for the file to be visible from another client, indicating
220         # that mount_a has completed its network ops
221         self.mount_b.wait_for_visible()
222
223         # Simulate client death
224         self.mount_a.kill()
225
226         try:
227             # Now, after mds_session_timeout seconds, the waiter should
228             # complete their operation when the MDS marks the holder's
229             # session stale.
230             cap_waiter = self.mount_b.write_background()
231             a = time.time()
232             cap_waiter.wait()
233             b = time.time()
234
235             # Should have succeeded
236             self.assertEqual(cap_waiter.exitstatus, 0)
237
238             cap_waited = b - a
239             log.info("cap_waiter waited {0}s".format(cap_waited))
240             self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0,
241                             "Capability handover took {0}, expected approx {1}".format(
242                                 cap_waited, self.mds_session_timeout
243                             ))
244
245             cap_holder.stdin.close()
246             try:
247                 cap_holder.wait()
248             except (CommandFailedError, ConnectionLostError):
249                 # We killed it (and possibly its node), so it raises an error
250                 pass
251         finally:
252             # teardown() doesn't quite handle this case cleanly, so help it out
253             self.mount_a.kill_cleanup()
254
255         self.mount_a.mount()
256         self.mount_a.wait_until_mounted()
257
258     def test_evicted_caps(self):
259         # Eviction while holding a capability
260         # ===================================
261
262         # Take out a write capability on a file on client A,
263         # and then immediately kill it.
264         cap_holder = self.mount_a.open_background()
265         mount_a_client_id = self.mount_a.get_global_id()
266
267         # Wait for the file to be visible from another client, indicating
268         # that mount_a has completed its network ops
269         self.mount_b.wait_for_visible()
270
271         # Simulate client death
272         self.mount_a.kill()
273
274         try:
275             # The waiter should get stuck waiting for the capability
276             # held on the MDS by the now-dead client A
277             cap_waiter = self.mount_b.write_background()
278             time.sleep(5)
279             self.assertFalse(cap_waiter.finished)
280
281             self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
282             # Now, because I evicted the old holder of the capability, it should
283             # immediately get handed over to the waiter
284             a = time.time()
285             cap_waiter.wait()
286             b = time.time()
287             cap_waited = b - a
288             log.info("cap_waiter waited {0}s".format(cap_waited))
289             # This is the check that it happened 'now' rather than waiting
290             # for the session timeout
291             self.assertLess(cap_waited, self.mds_session_timeout / 2.0,
292                             "Capability handover took {0}, expected less than {1}".format(
293                                 cap_waited, self.mds_session_timeout / 2.0
294                             ))
295
296             cap_holder.stdin.close()
297             try:
298                 cap_holder.wait()
299             except (CommandFailedError, ConnectionLostError):
300                 # We killed it (and possibly its node), so it raises an error
301                 pass
302         finally:
303             self.mount_a.kill_cleanup()
304
305         self.mount_a.mount()
306         self.mount_a.wait_until_mounted()
307
308     def test_trim_caps(self):
309         # Trim capability when reconnecting MDS
310         # ===================================
311
312         count = 500
313         # Create lots of files
314         for i in range(count):
315             self.mount_a.run_shell(["touch", "f{0}".format(i)])
316
317         # Populate mount_b's cache
318         self.mount_b.run_shell(["ls", "-l"])
319
320         client_id = self.mount_b.get_global_id()
321         num_caps = self._session_num_caps(client_id)
322         self.assertGreaterEqual(num_caps, count)
323
324         # Restart MDS. client should trim its cache when reconnecting to the MDS
325         self.fs.mds_fail_restart()
326         self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
327
328         num_caps = self._session_num_caps(client_id)
329         self.assertLess(num_caps, count,
330                         "should have less than {0} capabilities, have {1}".format(
331                             count, num_caps
332                         ))
333
334     def _is_flockable(self):
335         a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
336         b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
337         flock_version_str = "2.9"
338
339         version_regex = re.compile(r"[0-9\.]+")
340         a_result = version_regex.match(a_version_str)
341         self.assertTrue(a_result)
342         b_result = version_regex.match(b_version_str)
343         self.assertTrue(b_result)
344         a_version = version.StrictVersion(a_result.group())
345         b_version = version.StrictVersion(b_result.group())
346         flock_version=version.StrictVersion(flock_version_str)
347
348         if (a_version >= flock_version and b_version >= flock_version):
349             log.info("flock locks are available")
350             return True
351         else:
352             log.info("not testing flock locks, machines have versions {av} and {bv}".format(
353                 av=a_version_str,bv=b_version_str))
354             return False
355
356     def test_filelock(self):
357         """
358         Check that file lock doesn't get lost after an MDS restart
359         """
360
361         flockable = self._is_flockable()
362         lock_holder = self.mount_a.lock_background(do_flock=flockable)
363
364         self.mount_b.wait_for_visible("background_file-2")
365         self.mount_b.check_filelock(do_flock=flockable)
366
367         self.fs.mds_fail_restart()
368         self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
369
370         self.mount_b.check_filelock(do_flock=flockable)
371
372         # Tear down the background process
373         lock_holder.stdin.close()
374         try:
375             lock_holder.wait()
376         except (CommandFailedError, ConnectionLostError):
377             # We killed it, so it raises an error
378             pass
379
380     def test_filelock_eviction(self):
381         """
382         Check that file lock held by evicted client is given to
383         waiting client.
384         """
385         if not self._is_flockable():
386             self.skipTest("flock is not available")
387
388         lock_holder = self.mount_a.lock_background()
389         self.mount_b.wait_for_visible("background_file-2")
390         self.mount_b.check_filelock()
391
392         lock_taker = self.mount_b.lock_and_release()
393         # Check the taker is waiting (doesn't get it immediately)
394         time.sleep(2)
395         self.assertFalse(lock_holder.finished)
396         self.assertFalse(lock_taker.finished)
397
398         try:
399             mount_a_client_id = self.mount_a.get_global_id()
400             self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
401
402             # Evicting mount_a should let mount_b's attempt to take the lock
403             # succeed
404             self.wait_until_true(lambda: lock_taker.finished, timeout=10)
405         finally:
406             # teardown() doesn't quite handle this case cleanly, so help it out
407             self.mount_a.kill()
408             self.mount_a.kill_cleanup()
409
410         # Bring the client back
411         self.mount_a.mount()
412         self.mount_a.wait_until_mounted()
413
414     def test_dir_fsync(self):
415         self._test_fsync(True);
416
417     def test_create_fsync(self):
418         self._test_fsync(False);
419
420     def _test_fsync(self, dirfsync):
421         """
422         That calls to fsync guarantee visibility of metadata to another
423         client immediately after the fsyncing client dies.
424         """
425
426         # Leave this guy out until he's needed
427         self.mount_b.umount_wait()
428
429         # Create dir + child dentry on client A, and fsync the dir
430         path = os.path.join(self.mount_a.mountpoint, "subdir")
431         self.mount_a.run_python(
432             dedent("""
433                 import os
434                 import time
435
436                 path = "{path}"
437
438                 print "Starting creation..."
439                 start = time.time()
440
441                 os.mkdir(path)
442                 dfd = os.open(path, os.O_DIRECTORY)
443
444                 fd = open(os.path.join(path, "childfile"), "w")
445                 print "Finished creation in {{0}}s".format(time.time() - start)
446
447                 print "Starting fsync..."
448                 start = time.time()
449                 if {dirfsync}:
450                     os.fsync(dfd)
451                 else:
452                     os.fsync(fd)
453                 print "Finished fsync in {{0}}s".format(time.time() - start)
454             """.format(path=path,dirfsync=str(dirfsync)))
455         )
456
457         # Immediately kill the MDS and then client A
458         self.fs.mds_stop()
459         self.fs.mds_fail()
460         self.mount_a.kill()
461         self.mount_a.kill_cleanup()
462
463         # Restart the MDS.  Wait for it to come up, it'll have to time out in clientreplay
464         self.fs.mds_restart()
465         log.info("Waiting for reconnect...")
466         self.fs.wait_for_state("up:reconnect")
467         log.info("Waiting for active...")
468         self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
469         log.info("Reached active...")
470
471         # Is the child dentry visible from mount B?
472         self.mount_b.mount()
473         self.mount_b.wait_until_mounted()
474         self.mount_b.run_shell(["ls", "subdir/childfile"])