5 from teuthology.contextutil import MaxWhileTries
6 from teuthology.exceptions import CommandFailedError
7 from teuthology.orchestra.run import wait
8 from tasks.cephfs.fuse_mount import FuseMount
9 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
11 DAMAGED_ON_START = "damaged_on_start"
12 DAMAGED_ON_LS = "damaged_on_ls"
13 CRASHED = "server crashed"
14 NO_DAMAGE = "no damage"
15 FAILED_CLIENT = "client failed"
16 FAILED_SERVER = "server failed"
18 # An EIO in response to a stat from the client
21 # An EIO, but nothing in damage table (not ever what we expect)
22 EIO_NO_DAMAGE = "eio without damage entry"
25 log = logging.getLogger(__name__)
28 class TestDamage(CephFSTestCase):
29 def _simple_workload_write(self):
30 self.mount_a.run_shell(["mkdir", "subdir"])
31 self.mount_a.write_n_mb("subdir/sixmegs", 6)
32 return self.mount_a.stat("subdir/sixmegs")
34 def is_marked_damaged(self, rank):
35 mds_map = self.fs.get_mds_map()
36 return rank in mds_map['damaged']
39 def test_object_deletion(self):
41 That the MDS has a clean 'damaged' response to loss of any single metadata object
44 self._simple_workload_write()
46 # Hmm, actually it would be nice to permute whether the metadata pool
47 # state contains sessions or not, but for the moment close this session
48 # to avoid waiting through reconnect on every MDS start.
49 self.mount_a.umount_wait()
50 for mds_name in self.fs.get_active_names():
51 self.fs.mds_asok(["flush", "journal"], mds_name)
56 self.fs.rados(['export', '/tmp/metadata.bin'])
58 def is_ignored(obj_id, dentry=None):
60 A filter to avoid redundantly mutating many similar objects (e.g.
61 stray dirfrags) or similar dentries (e.g. stray dir dentries)
63 if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
66 if dentry and obj_id == "100.00000000":
67 if re.match("stray.+_head", dentry) and dentry != "stray0_head":
72 def get_path(obj_id, dentry=None):
74 What filesystem path does this object or dentry correspond to? i.e.
75 what should I poke to see EIO after damaging it?
78 if obj_id == "1.00000000" and dentry == "subdir_head":
80 elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
81 return "./subdir/sixmegs"
83 # None means ls will do an "ls -R" in hope of seeing some errors
86 objects = self.fs.rados(["ls"]).split("\n")
87 objects = [o for o in objects if not is_ignored(o)]
89 # Find all objects with an OMAP header
92 header = self.fs.rados(["getomapheader", o])
93 # The rados CLI wraps the header output in a hex-printed style
94 header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
96 omap_header_objs.append(o)
98 # Find all OMAP key/vals
101 keys_str = self.fs.rados(["listomapkeys", o])
103 for key in keys_str.split("\n"):
104 if not is_ignored(o, key):
105 omap_keys.append((o, key))
107 # Find objects that have data in their bodies
109 for obj_id in objects:
110 stat_out = self.fs.rados(["stat", obj_id])
111 size = int(re.match(".+, size (.+)$", stat_out).group(1))
113 data_objects.append(obj_id)
115 # Define the various forms of damage we will inflict
116 class MetadataMutation(object):
117 def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
118 self.obj_id = obj_id_
120 self.mutate_fn = mutate_fn_
121 self.expectation = expectation_
125 self.ls_path = ls_path
127 def __eq__(self, other):
128 return self.desc == other.desc
131 return hash(self.desc)
133 junk = "deadbeef" * 10
137 for obj_id in objects:
139 # JournalPointers are auto-replaced if missing (same path as upgrade)
141 # Missing dirfrags for non-system dirs result in empty directory
142 "10000000000.00000000",
143 # PurgeQueue is auto-created if not found on startup
146 expectation = NO_DAMAGE
148 expectation = DAMAGED_ON_START
150 log.info("Expectation on rm '{0}' will be '{1}'".format(
154 mutations.append(MetadataMutation(
156 "Delete {0}".format(obj_id),
157 lambda o=obj_id: self.fs.rados(["rm", o]),
161 # Blatant corruptions
165 "Corrupt {0}".format(o),
166 lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk),
168 ) for o in data_objects
172 for obj_id in data_objects:
173 if obj_id == "500.00000000":
174 # The PurgeQueue is allowed to be empty: Journaler interprets
175 # an empty header object as an empty journal.
176 expectation = NO_DAMAGE
178 expectation = DAMAGED_ON_START
183 "Truncate {0}".format(o),
184 lambda o=o: self.fs.rados(["truncate", o, "0"]),
188 # OMAP value corruptions
189 for o, k in omap_keys:
190 if o.startswith("100."):
191 # Anything in rank 0's 'mydir'
192 expectation = DAMAGED_ON_START
194 expectation = EIO_ON_LS
199 "Corrupt omap key {0}:{1}".format(o, k),
200 lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
206 # OMAP header corruptions
207 for obj_id in omap_header_objs:
208 if re.match("60.\.00000000", obj_id) \
209 or obj_id in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
210 expectation = DAMAGED_ON_START
212 expectation = NO_DAMAGE
214 log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
221 "Corrupt omap header on {0}".format(obj_id),
222 lambda o=obj_id: self.fs.rados(["setomapheader", o, junk]),
229 for mutation in mutations:
230 log.info("Applying mutation '{0}'".format(mutation.desc))
233 self.mount_a.umount_wait(force=True)
236 self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
238 # Reset RADOS pool state
239 self.fs.rados(['import', '/tmp/metadata.bin'])
241 # Inject the mutation
244 # Try starting the MDS
245 self.fs.mds_restart()
247 # How long we'll wait between starting a daemon and expecting
248 # it to make it through startup, and potentially declare itself
249 # damaged to the mon cluster.
252 if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
253 if mutation.expectation == DAMAGED_ON_START:
254 # The MDS may pass through active before making it to damaged
256 self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
260 # Wait for MDS to either come up or go into damaged state
262 self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
265 # Didn't make it to healthy or damaged, did it crash?
266 for daemon_id, daemon in self.fs.mds_daemons.items():
267 if daemon.proc and daemon.proc.finished:
269 log.error("Daemon {0} crashed!".format(daemon_id))
270 daemon.proc = None # So that subsequent stop() doesn't raise error
272 # Didn't go health, didn't go damaged, didn't crash, so what?
275 log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
276 results[mutation] = CRASHED
278 if self.is_marked_damaged(0):
279 log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
280 results[mutation] = DAMAGED_ON_START
283 log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
286 self.wait_until_true(self.fs.are_daemons_healthy, 60)
288 log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
289 if self.is_marked_damaged(0):
290 results[mutation] = DAMAGED_ON_START
292 results[mutation] = FAILED_SERVER
294 log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
296 # MDS is up, should go damaged on ls or client mount
298 self.mount_a.wait_until_mounted()
299 if mutation.ls_path == ".":
300 proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
302 proc = self.mount_a.stat(mutation.ls_path, wait=False)
304 if mutation.expectation == DAMAGED_ON_LS:
306 self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
307 log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
308 results[mutation] = DAMAGED_ON_LS
310 if self.fs.are_daemons_healthy():
311 log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
313 results[mutation] = NO_DAMAGE
315 log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
316 results[mutation] = FAILED_SERVER
321 log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
322 results[mutation] = NO_DAMAGE
323 except MaxWhileTries:
324 log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
325 results[mutation] = FAILED_CLIENT
326 except CommandFailedError as e:
327 if e.exitstatus == errno.EIO:
328 log.info("Result: EIO on client")
329 results[mutation] = EIO_ON_LS
331 log.info("Result: unexpected error {0} on client".format(e))
332 results[mutation] = FAILED_CLIENT
334 if mutation.expectation == EIO_ON_LS:
335 # EIOs mean something handled by DamageTable: assert that it has
338 self.fs.mon_manager.raw_cluster_cmd(
339 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
341 results[mutation] = EIO_NO_DAMAGE
343 failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
345 log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
346 for mutation, result in failures:
347 log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
348 mutation.expectation, result, mutation.desc
350 raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
352 log.info("All {0} mutations had expected outcomes".format(len(mutations)))
354 def test_damaged_dentry(self):
355 # Damage to dentrys is interesting because it leaves the
356 # directory's `complete` flag in a subtle state where
357 # we have marked the dir complete in order that folks
358 # can access it, but in actual fact there is a dentry
360 self.mount_a.run_shell(["mkdir", "subdir/"])
362 self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
363 self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
365 subdir_ino = self.mount_a.path_to_ino("subdir")
367 self.mount_a.umount_wait()
368 for mds_name in self.fs.get_active_names():
369 self.fs.mds_asok(["flush", "journal"], mds_name)
375 junk = "deadbeef" * 10
376 dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
377 self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
379 # Start up and try to list it
380 self.fs.mds_restart()
381 self.fs.wait_for_daemons()
384 self.mount_a.wait_until_mounted()
385 dentries = self.mount_a.ls("subdir/")
387 # The damaged guy should have disappeared
388 self.assertEqual(dentries, ["file_undamaged"])
390 # I should get ENOENT if I try and read it normally, because
391 # the dir is considered complete
393 self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
394 except CommandFailedError as e:
395 self.assertEqual(e.exitstatus, errno.ENOENT)
397 raise AssertionError("Expected ENOENT")
399 # The fact that there is damaged should have bee recorded
401 self.fs.mon_manager.raw_cluster_cmd(
402 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
403 "damage", "ls", '--format=json-pretty'))
404 self.assertEqual(len(damage), 1)
405 damage_id = damage[0]['id']
407 # If I try to create a dentry with the same name as the damaged guy
408 # then that should be forbidden
410 self.mount_a.touch("subdir/file_to_be_damaged")
411 except CommandFailedError as e:
412 self.assertEqual(e.exitstatus, errno.EIO)
414 raise AssertionError("Expected EIO")
416 # Attempting that touch will clear the client's complete flag, now
417 # when I stat it I'll get EIO instead of ENOENT
419 self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
420 except CommandFailedError as e:
421 if isinstance(self.mount_a, FuseMount):
422 self.assertEqual(e.exitstatus, errno.EIO)
424 # Kernel client handles this case differently
425 self.assertEqual(e.exitstatus, errno.ENOENT)
427 raise AssertionError("Expected EIO")
429 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
430 self.assertEqual(nfiles, "2")
432 self.mount_a.umount_wait()
434 # Now repair the stats
435 scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
436 log.info(json.dumps(scrub_json, indent=2))
438 self.assertEqual(scrub_json["passed_validation"], False)
439 self.assertEqual(scrub_json["raw_stats"]["checked"], True)
440 self.assertEqual(scrub_json["raw_stats"]["passed"], False)
442 # Check that the file count is now correct
444 self.mount_a.wait_until_mounted()
445 nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
446 self.assertEqual(nfiles, "1")
448 # Clean up the omap object
449 self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
451 # Clean up the damagetable entry
452 self.fs.mon_manager.raw_cluster_cmd(
453 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
454 "damage", "rm", "{did}".format(did=damage_id))
456 # Now I should be able to create a file with the same name as the
457 # damaged guy if I want.
458 self.mount_a.touch("subdir/file_to_be_damaged")
460 def test_open_ino_errors(self):
462 That errors encountered during opening inos are properly propagated
465 self.mount_a.run_shell(["mkdir", "dir1"])
466 self.mount_a.run_shell(["touch", "dir1/file1"])
467 self.mount_a.run_shell(["mkdir", "dir2"])
468 self.mount_a.run_shell(["touch", "dir2/file2"])
469 self.mount_a.run_shell(["mkdir", "testdir"])
470 self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
471 self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
473 file1_ino = self.mount_a.path_to_ino("dir1/file1")
474 file2_ino = self.mount_a.path_to_ino("dir2/file2")
475 dir2_ino = self.mount_a.path_to_ino("dir2")
477 # Ensure everything is written to backing store
478 self.mount_a.umount_wait()
479 self.fs.mds_asok(["flush", "journal"])
481 # Drop everything from the MDS cache
482 self.mds_cluster.mds_stop()
483 self.fs.journal_tool(['journal', 'reset'])
484 self.mds_cluster.mds_fail_restart()
485 self.fs.wait_for_daemons()
489 # Case 1: un-decodeable backtrace
491 # Validate that the backtrace is present and decodable
492 self.fs.read_backtrace(file1_ino)
493 # Go corrupt the backtrace of alpha/target (used for resolving
495 self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
497 # Check that touching the hardlink gives EIO
498 ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
501 except CommandFailedError:
502 self.assertTrue("Input/output error" in ran.stderr.getvalue())
504 # Check that an entry is created in the damage table
506 self.fs.mon_manager.raw_cluster_cmd(
507 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
508 "damage", "ls", '--format=json-pretty'))
509 self.assertEqual(len(damage), 1)
510 self.assertEqual(damage[0]['damage_type'], "backtrace")
511 self.assertEqual(damage[0]['ino'], file1_ino)
513 self.fs.mon_manager.raw_cluster_cmd(
514 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
515 "damage", "rm", str(damage[0]['id']))
518 # Case 2: missing dirfrag for the target inode
520 self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
522 # Check that touching the hardlink gives EIO
523 ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
526 except CommandFailedError:
527 self.assertTrue("Input/output error" in ran.stderr.getvalue())
529 # Check that an entry is created in the damage table
531 self.fs.mon_manager.raw_cluster_cmd(
532 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
533 "damage", "ls", '--format=json-pretty'))
534 self.assertEqual(len(damage), 2)
535 if damage[0]['damage_type'] == "backtrace" :
536 self.assertEqual(damage[0]['ino'], file2_ino)
537 self.assertEqual(damage[1]['damage_type'], "dir_frag")
538 self.assertEqual(damage[1]['ino'], dir2_ino)
540 self.assertEqual(damage[0]['damage_type'], "dir_frag")
541 self.assertEqual(damage[0]['ino'], dir2_ino)
542 self.assertEqual(damage[1]['damage_type'], "backtrace")
543 self.assertEqual(damage[1]['ino'], file2_ino)
546 self.fs.mon_manager.raw_cluster_cmd(
547 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
548 "damage", "rm", str(entry['id']))