Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / qa / tasks / cephfs / test_damage.py
1 import json
2 import logging
3 import errno
4 import re
5 from teuthology.contextutil import MaxWhileTries
6 from teuthology.exceptions import CommandFailedError
7 from teuthology.orchestra.run import wait
8 from tasks.cephfs.fuse_mount import FuseMount
9 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
10
11 DAMAGED_ON_START = "damaged_on_start"
12 DAMAGED_ON_LS = "damaged_on_ls"
13 CRASHED = "server crashed"
14 NO_DAMAGE = "no damage"
15 FAILED_CLIENT = "client failed"
16 FAILED_SERVER = "server failed"
17
18 # An EIO in response to a stat from the client
19 EIO_ON_LS = "eio"
20
21 # An EIO, but nothing in damage table (not ever what we expect)
22 EIO_NO_DAMAGE = "eio without damage entry"
23
24
25 log = logging.getLogger(__name__)
26
27
28 class TestDamage(CephFSTestCase):
29     def _simple_workload_write(self):
30         self.mount_a.run_shell(["mkdir", "subdir"])
31         self.mount_a.write_n_mb("subdir/sixmegs", 6)
32         return self.mount_a.stat("subdir/sixmegs")
33
34     def is_marked_damaged(self, rank):
35         mds_map = self.fs.get_mds_map()
36         return rank in mds_map['damaged']
37
38     @for_teuthology #459s
39     def test_object_deletion(self):
40         """
41         That the MDS has a clean 'damaged' response to loss of any single metadata object
42         """
43
44         self._simple_workload_write()
45
46         # Hmm, actually it would be nice to permute whether the metadata pool
47         # state contains sessions or not, but for the moment close this session
48         # to avoid waiting through reconnect on every MDS start.
49         self.mount_a.umount_wait()
50         for mds_name in self.fs.get_active_names():
51             self.fs.mds_asok(["flush", "journal"], mds_name)
52
53         self.fs.mds_stop()
54         self.fs.mds_fail()
55
56         self.fs.rados(['export', '/tmp/metadata.bin'])
57
58         def is_ignored(obj_id, dentry=None):
59             """
60             A filter to avoid redundantly mutating many similar objects (e.g.
61             stray dirfrags) or similar dentries (e.g. stray dir dentries)
62             """
63             if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
64                 return True
65
66             if dentry and obj_id == "100.00000000":
67                 if re.match("stray.+_head", dentry) and dentry != "stray0_head":
68                     return True
69
70             return False
71
72         def get_path(obj_id, dentry=None):
73             """
74             What filesystem path does this object or dentry correspond to?   i.e.
75             what should I poke to see EIO after damaging it?
76             """
77
78             if obj_id == "1.00000000" and dentry == "subdir_head":
79                 return "./subdir"
80             elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
81                 return "./subdir/sixmegs"
82
83             # None means ls will do an "ls -R" in hope of seeing some errors
84             return None
85
86         objects = self.fs.rados(["ls"]).split("\n")
87         objects = [o for o in objects if not is_ignored(o)]
88
89         # Find all objects with an OMAP header
90         omap_header_objs = []
91         for o in objects:
92             header = self.fs.rados(["getomapheader", o])
93             # The rados CLI wraps the header output in a hex-printed style
94             header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
95             if header_bytes > 0:
96                 omap_header_objs.append(o)
97
98         # Find all OMAP key/vals
99         omap_keys = []
100         for o in objects:
101             keys_str = self.fs.rados(["listomapkeys", o])
102             if keys_str:
103                 for key in keys_str.split("\n"):
104                     if not is_ignored(o, key):
105                         omap_keys.append((o, key))
106
107         # Find objects that have data in their bodies
108         data_objects = []
109         for obj_id in objects:
110             stat_out = self.fs.rados(["stat", obj_id])
111             size = int(re.match(".+, size (.+)$", stat_out).group(1))
112             if size > 0:
113                 data_objects.append(obj_id)
114
115         # Define the various forms of damage we will inflict
116         class MetadataMutation(object):
117             def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
118                 self.obj_id = obj_id_
119                 self.desc = desc_
120                 self.mutate_fn = mutate_fn_
121                 self.expectation = expectation_
122                 if ls_path is None:
123                     self.ls_path = "."
124                 else:
125                     self.ls_path = ls_path
126
127             def __eq__(self, other):
128                 return self.desc == other.desc
129
130             def __hash__(self):
131                 return hash(self.desc)
132
133         junk = "deadbeef" * 10
134         mutations = []
135
136         # Removals
137         for obj_id in objects:
138             if obj_id in [
139                 # JournalPointers are auto-replaced if missing (same path as upgrade)
140                 "400.00000000",
141                 # Missing dirfrags for non-system dirs result in empty directory
142                 "10000000000.00000000",
143                 # PurgeQueue is auto-created if not found on startup
144                 "500.00000000"
145             ]:
146                 expectation = NO_DAMAGE
147             else:
148                 expectation = DAMAGED_ON_START
149
150             log.info("Expectation on rm '{0}' will be '{1}'".format(
151                 obj_id, expectation
152             ))
153
154             mutations.append(MetadataMutation(
155                 obj_id,
156                 "Delete {0}".format(obj_id),
157                 lambda o=obj_id: self.fs.rados(["rm", o]),
158                 expectation
159             ))
160
161         # Blatant corruptions
162         mutations.extend([
163             MetadataMutation(
164                 o,
165                 "Corrupt {0}".format(o),
166                 lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk),
167                 DAMAGED_ON_START
168             ) for o in data_objects
169         ])
170
171         # Truncations
172         for obj_id in data_objects:
173             if obj_id == "500.00000000":
174                 # The PurgeQueue is allowed to be empty: Journaler interprets
175                 # an empty header object as an empty journal.
176                 expectation = NO_DAMAGE
177             else:
178                 expectation = DAMAGED_ON_START
179
180             mutations.append(
181                 MetadataMutation(
182                     o,
183                     "Truncate {0}".format(o),
184                     lambda o=o: self.fs.rados(["truncate", o, "0"]),
185                     DAMAGED_ON_START
186             ))
187
188         # OMAP value corruptions
189         for o, k in omap_keys:
190             if o.startswith("100."):
191                 # Anything in rank 0's 'mydir'
192                 expectation = DAMAGED_ON_START
193             else:
194                 expectation = EIO_ON_LS
195
196             mutations.append(
197                 MetadataMutation(
198                     o,
199                     "Corrupt omap key {0}:{1}".format(o, k),
200                     lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
201                     expectation,
202                     get_path(o, k)
203                 )
204             )
205
206         # OMAP header corruptions
207         for obj_id in omap_header_objs:
208             if re.match("60.\.00000000", obj_id) \
209                     or obj_id in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
210                 expectation = DAMAGED_ON_START
211             else:
212                 expectation = NO_DAMAGE
213
214             log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
215                 obj_id, expectation
216             ))
217
218             mutations.append(
219                 MetadataMutation(
220                     obj_id,
221                     "Corrupt omap header on {0}".format(obj_id),
222                     lambda o=obj_id: self.fs.rados(["setomapheader", o, junk]),
223                     expectation
224                 )
225             )
226
227         results = {}
228
229         for mutation in mutations:
230             log.info("Applying mutation '{0}'".format(mutation.desc))
231
232             # Reset MDS state
233             self.mount_a.umount_wait(force=True)
234             self.fs.mds_stop()
235             self.fs.mds_fail()
236             self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
237
238             # Reset RADOS pool state
239             self.fs.rados(['import', '/tmp/metadata.bin'])
240
241             # Inject the mutation
242             mutation.mutate_fn()
243
244             # Try starting the MDS
245             self.fs.mds_restart()
246
247             # How long we'll wait between starting a daemon and expecting
248             # it to make it through startup, and potentially declare itself
249             # damaged to the mon cluster.
250             startup_timeout = 60
251
252             if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
253                 if mutation.expectation == DAMAGED_ON_START:
254                     # The MDS may pass through active before making it to damaged
255                     try:
256                         self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
257                     except RuntimeError:
258                         pass
259
260                 # Wait for MDS to either come up or go into damaged state
261                 try:
262                     self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
263                 except RuntimeError:
264                     crashed = False
265                     # Didn't make it to healthy or damaged, did it crash?
266                     for daemon_id, daemon in self.fs.mds_daemons.items():
267                         if daemon.proc and daemon.proc.finished:
268                             crashed = True
269                             log.error("Daemon {0} crashed!".format(daemon_id))
270                             daemon.proc = None  # So that subsequent stop() doesn't raise error
271                     if not crashed:
272                         # Didn't go health, didn't go damaged, didn't crash, so what?
273                         raise
274                     else:
275                         log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
276                         results[mutation] = CRASHED
277                         continue
278                 if self.is_marked_damaged(0):
279                     log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
280                     results[mutation] = DAMAGED_ON_START
281                     continue
282                 else:
283                     log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
284             else:
285                 try:
286                     self.wait_until_true(self.fs.are_daemons_healthy, 60)
287                 except RuntimeError:
288                     log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
289                     if self.is_marked_damaged(0):
290                         results[mutation] = DAMAGED_ON_START
291                     else:
292                         results[mutation] = FAILED_SERVER
293                     continue
294                 log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
295
296             # MDS is up, should go damaged on ls or client mount
297             self.mount_a.mount()
298             self.mount_a.wait_until_mounted()
299             if mutation.ls_path == ".":
300                 proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
301             else:
302                 proc = self.mount_a.stat(mutation.ls_path, wait=False)
303
304             if mutation.expectation == DAMAGED_ON_LS:
305                 try:
306                     self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
307                     log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
308                     results[mutation] = DAMAGED_ON_LS
309                 except RuntimeError:
310                     if self.fs.are_daemons_healthy():
311                         log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
312                             mutation.desc))
313                         results[mutation] = NO_DAMAGE
314                     else:
315                         log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
316                         results[mutation] = FAILED_SERVER
317
318             else:
319                 try:
320                     wait([proc], 20)
321                     log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
322                     results[mutation] = NO_DAMAGE
323                 except MaxWhileTries:
324                     log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
325                     results[mutation] = FAILED_CLIENT
326                 except CommandFailedError as e:
327                     if e.exitstatus == errno.EIO:
328                         log.info("Result: EIO on client")
329                         results[mutation] = EIO_ON_LS
330                     else:
331                         log.info("Result: unexpected error {0} on client".format(e))
332                         results[mutation] = FAILED_CLIENT
333
334             if mutation.expectation == EIO_ON_LS:
335                 # EIOs mean something handled by DamageTable: assert that it has
336                 # been populated
337                 damage = json.loads(
338                     self.fs.mon_manager.raw_cluster_cmd(
339                         'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
340                 if len(damage) == 0:
341                     results[mutation] = EIO_NO_DAMAGE
342
343         failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
344         if failures:
345             log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
346             for mutation, result in failures:
347                 log.error("  Expected '{0}' actually '{1}' from '{2}'".format(
348                     mutation.expectation, result, mutation.desc
349                 ))
350             raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
351         else:
352             log.info("All {0} mutations had expected outcomes".format(len(mutations)))
353
354     def test_damaged_dentry(self):
355         # Damage to dentrys is interesting because it leaves the
356         # directory's `complete` flag in a subtle state where
357         # we have marked the dir complete in order that folks
358         # can access it, but in actual fact there is a dentry
359         # missing
360         self.mount_a.run_shell(["mkdir", "subdir/"])
361
362         self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
363         self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
364
365         subdir_ino = self.mount_a.path_to_ino("subdir")
366
367         self.mount_a.umount_wait()
368         for mds_name in self.fs.get_active_names():
369             self.fs.mds_asok(["flush", "journal"], mds_name)
370
371         self.fs.mds_stop()
372         self.fs.mds_fail()
373
374         # Corrupt a dentry
375         junk = "deadbeef" * 10
376         dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
377         self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
378
379         # Start up and try to list it
380         self.fs.mds_restart()
381         self.fs.wait_for_daemons()
382
383         self.mount_a.mount()
384         self.mount_a.wait_until_mounted()
385         dentries = self.mount_a.ls("subdir/")
386
387         # The damaged guy should have disappeared
388         self.assertEqual(dentries, ["file_undamaged"])
389
390         # I should get ENOENT if I try and read it normally, because
391         # the dir is considered complete
392         try:
393             self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
394         except CommandFailedError as e:
395             self.assertEqual(e.exitstatus, errno.ENOENT)
396         else:
397             raise AssertionError("Expected ENOENT")
398
399         # The fact that there is damaged should have bee recorded
400         damage = json.loads(
401             self.fs.mon_manager.raw_cluster_cmd(
402                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
403                 "damage", "ls", '--format=json-pretty'))
404         self.assertEqual(len(damage), 1)
405         damage_id = damage[0]['id']
406
407         # If I try to create a dentry with the same name as the damaged guy
408         # then that should be forbidden
409         try:
410             self.mount_a.touch("subdir/file_to_be_damaged")
411         except CommandFailedError as e:
412             self.assertEqual(e.exitstatus, errno.EIO)
413         else:
414             raise AssertionError("Expected EIO")
415
416         # Attempting that touch will clear the client's complete flag, now
417         # when I stat it I'll get EIO instead of ENOENT
418         try:
419             self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
420         except CommandFailedError as e:
421             if isinstance(self.mount_a, FuseMount):
422                 self.assertEqual(e.exitstatus, errno.EIO)
423             else:
424                 # Kernel client handles this case differently
425                 self.assertEqual(e.exitstatus, errno.ENOENT)
426         else:
427             raise AssertionError("Expected EIO")
428
429         nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
430         self.assertEqual(nfiles, "2")
431
432         self.mount_a.umount_wait()
433
434         # Now repair the stats
435         scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
436         log.info(json.dumps(scrub_json, indent=2))
437
438         self.assertEqual(scrub_json["passed_validation"], False)
439         self.assertEqual(scrub_json["raw_stats"]["checked"], True)
440         self.assertEqual(scrub_json["raw_stats"]["passed"], False)
441
442         # Check that the file count is now correct
443         self.mount_a.mount()
444         self.mount_a.wait_until_mounted()
445         nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
446         self.assertEqual(nfiles, "1")
447
448         # Clean up the omap object
449         self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
450
451         # Clean up the damagetable entry
452         self.fs.mon_manager.raw_cluster_cmd(
453             'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
454             "damage", "rm", "{did}".format(did=damage_id))
455
456         # Now I should be able to create a file with the same name as the
457         # damaged guy if I want.
458         self.mount_a.touch("subdir/file_to_be_damaged")
459
460     def test_open_ino_errors(self):
461         """
462         That errors encountered during opening inos are properly propagated
463         """
464
465         self.mount_a.run_shell(["mkdir", "dir1"])
466         self.mount_a.run_shell(["touch", "dir1/file1"])
467         self.mount_a.run_shell(["mkdir", "dir2"])
468         self.mount_a.run_shell(["touch", "dir2/file2"])
469         self.mount_a.run_shell(["mkdir", "testdir"])
470         self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
471         self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
472
473         file1_ino = self.mount_a.path_to_ino("dir1/file1")
474         file2_ino = self.mount_a.path_to_ino("dir2/file2")
475         dir2_ino = self.mount_a.path_to_ino("dir2")
476
477         # Ensure everything is written to backing store
478         self.mount_a.umount_wait()
479         self.fs.mds_asok(["flush", "journal"])
480
481         # Drop everything from the MDS cache
482         self.mds_cluster.mds_stop()
483         self.fs.journal_tool(['journal', 'reset'])
484         self.mds_cluster.mds_fail_restart()
485         self.fs.wait_for_daemons()
486
487         self.mount_a.mount()
488
489         # Case 1: un-decodeable backtrace
490
491         # Validate that the backtrace is present and decodable
492         self.fs.read_backtrace(file1_ino)
493         # Go corrupt the backtrace of alpha/target (used for resolving
494         # bravo/hardlink).
495         self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
496
497         # Check that touching the hardlink gives EIO
498         ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
499         try:
500             ran.wait()
501         except CommandFailedError:
502             self.assertTrue("Input/output error" in ran.stderr.getvalue())
503
504         # Check that an entry is created in the damage table
505         damage = json.loads(
506             self.fs.mon_manager.raw_cluster_cmd(
507                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
508                 "damage", "ls", '--format=json-pretty'))
509         self.assertEqual(len(damage), 1)
510         self.assertEqual(damage[0]['damage_type'], "backtrace")
511         self.assertEqual(damage[0]['ino'], file1_ino)
512
513         self.fs.mon_manager.raw_cluster_cmd(
514             'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
515             "damage", "rm", str(damage[0]['id']))
516
517
518         # Case 2: missing dirfrag for the target inode
519
520         self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
521
522         # Check that touching the hardlink gives EIO
523         ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
524         try:
525             ran.wait()
526         except CommandFailedError:
527             self.assertTrue("Input/output error" in ran.stderr.getvalue())
528
529         # Check that an entry is created in the damage table
530         damage = json.loads(
531             self.fs.mon_manager.raw_cluster_cmd(
532                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
533                 "damage", "ls", '--format=json-pretty'))
534         self.assertEqual(len(damage), 2)
535         if damage[0]['damage_type'] == "backtrace" :
536             self.assertEqual(damage[0]['ino'], file2_ino)
537             self.assertEqual(damage[1]['damage_type'], "dir_frag")
538             self.assertEqual(damage[1]['ino'], dir2_ino)
539         else:
540             self.assertEqual(damage[0]['damage_type'], "dir_frag")
541             self.assertEqual(damage[0]['ino'], dir2_ino)
542             self.assertEqual(damage[1]['damage_type'], "backtrace")
543             self.assertEqual(damage[1]['ino'], file2_ino)
544
545         for entry in damage:
546             self.fs.mon_manager.raw_cluster_cmd(
547                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
548                 "damage", "rm", str(entry['id']))