src/ceph/qa/tasks/cephfs/test_damage.py

   1 import json
   2 import logging
   3 import errno
   4 import re
   5 from teuthology.contextutil import MaxWhileTries
   6 from teuthology.exceptions import CommandFailedError
   7 from teuthology.orchestra.run import wait
   8 from tasks.cephfs.fuse_mount import FuseMount
   9 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
  10
  11 DAMAGED_ON_START = "damaged_on_start"
  12 DAMAGED_ON_LS = "damaged_on_ls"
  13 CRASHED = "server crashed"
  14 NO_DAMAGE = "no damage"
  15 FAILED_CLIENT = "client failed"
  16 FAILED_SERVER = "server failed"
  17
  18 # An EIO in response to a stat from the client
  19 EIO_ON_LS = "eio"
  20
  21 # An EIO, but nothing in damage table (not ever what we expect)
  22 EIO_NO_DAMAGE = "eio without damage entry"
  23
  24
  25 log = logging.getLogger(__name__)
  26
  27
  28 class TestDamage(CephFSTestCase):
  29     def _simple_workload_write(self):
  30         self.mount_a.run_shell(["mkdir", "subdir"])
  31         self.mount_a.write_n_mb("subdir/sixmegs", 6)
  32         return self.mount_a.stat("subdir/sixmegs")
  33
  34     def is_marked_damaged(self, rank):
  35         mds_map = self.fs.get_mds_map()
  36         return rank in mds_map['damaged']
  37
  38     @for_teuthology #459s
  39     def test_object_deletion(self):
  40         """
  41         That the MDS has a clean 'damaged' response to loss of any single metadata object
  42         """
  43
  44         self._simple_workload_write()
  45
  46         # Hmm, actually it would be nice to permute whether the metadata pool
  47         # state contains sessions or not, but for the moment close this session
  48         # to avoid waiting through reconnect on every MDS start.
  49         self.mount_a.umount_wait()
  50         for mds_name in self.fs.get_active_names():
  51             self.fs.mds_asok(["flush", "journal"], mds_name)
  52
  53         self.fs.mds_stop()
  54         self.fs.mds_fail()
  55
  56         self.fs.rados(['export', '/tmp/metadata.bin'])
  57
  58         def is_ignored(obj_id, dentry=None):
  59             """
  60             A filter to avoid redundantly mutating many similar objects (e.g.
  61             stray dirfrags) or similar dentries (e.g. stray dir dentries)
  62             """
  63             if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
  64                 return True
  65
  66             if dentry and obj_id == "100.00000000":
  67                 if re.match("stray.+_head", dentry) and dentry != "stray0_head":
  68                     return True
  69
  70             return False
  71
  72         def get_path(obj_id, dentry=None):
  73             """
  74             What filesystem path does this object or dentry correspond to?   i.e.
  75             what should I poke to see EIO after damaging it?
  76             """
  77
  78             if obj_id == "1.00000000" and dentry == "subdir_head":
  79                 return "./subdir"
  80             elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
  81                 return "./subdir/sixmegs"
  82
  83             # None means ls will do an "ls -R" in hope of seeing some errors
  84             return None
  85
  86         objects = self.fs.rados(["ls"]).split("\n")
  87         objects = [o for o in objects if not is_ignored(o)]
  88
  89         # Find all objects with an OMAP header
  90         omap_header_objs = []
  91         for o in objects:
  92             header = self.fs.rados(["getomapheader", o])
  93             # The rados CLI wraps the header output in a hex-printed style
  94             header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
  95             if header_bytes > 0:
  96                 omap_header_objs.append(o)
  97
  98         # Find all OMAP key/vals
  99         omap_keys = []
 100         for o in objects:
 101             keys_str = self.fs.rados(["listomapkeys", o])
 102             if keys_str:
 103                 for key in keys_str.split("\n"):
 104                     if not is_ignored(o, key):
 105                         omap_keys.append((o, key))
 106
 107         # Find objects that have data in their bodies
 108         data_objects = []
 109         for obj_id in objects:
 110             stat_out = self.fs.rados(["stat", obj_id])
 111             size = int(re.match(".+, size (.+)$", stat_out).group(1))
 112             if size > 0:
 113                 data_objects.append(obj_id)
 114
 115         # Define the various forms of damage we will inflict
 116         class MetadataMutation(object):
 117             def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
 118                 self.obj_id = obj_id_
 119                 self.desc = desc_
 120                 self.mutate_fn = mutate_fn_
 121                 self.expectation = expectation_
 122                 if ls_path is None:
 123                     self.ls_path = "."
 124                 else:
 125                     self.ls_path = ls_path
 126
 127             def __eq__(self, other):
 128                 return self.desc == other.desc
 129
 130             def __hash__(self):
 131                 return hash(self.desc)
 132
 133         junk = "deadbeef" * 10
 134         mutations = []
 135
 136         # Removals
 137         for obj_id in objects:
 138             if obj_id in [
 139                 # JournalPointers are auto-replaced if missing (same path as upgrade)
 140                 "400.00000000",
 141                 # Missing dirfrags for non-system dirs result in empty directory
 142                 "10000000000.00000000",
 143                 # PurgeQueue is auto-created if not found on startup
 144                 "500.00000000"
 145             ]:
 146                 expectation = NO_DAMAGE
 147             else:
 148                 expectation = DAMAGED_ON_START
 149
 150             log.info("Expectation on rm '{0}' will be '{1}'".format(
 151                 obj_id, expectation
 152             ))
 153
 154             mutations.append(MetadataMutation(
 155                 obj_id,
 156                 "Delete {0}".format(obj_id),
 157                 lambda o=obj_id: self.fs.rados(["rm", o]),
 158                 expectation
 159             ))
 160
 161         # Blatant corruptions
 162         mutations.extend([
 163             MetadataMutation(
 164                 o,
 165                 "Corrupt {0}".format(o),
 166                 lambda o=o: self.fs.rados(["put", o, "-"], stdin_data=junk),
 167                 DAMAGED_ON_START
 168             ) for o in data_objects
 169         ])
 170
 171         # Truncations
 172         for obj_id in data_objects:
 173             if obj_id == "500.00000000":
 174                 # The PurgeQueue is allowed to be empty: Journaler interprets
 175                 # an empty header object as an empty journal.
 176                 expectation = NO_DAMAGE
 177             else:
 178                 expectation = DAMAGED_ON_START
 179
 180             mutations.append(
 181                 MetadataMutation(
 182                     o,
 183                     "Truncate {0}".format(o),
 184                     lambda o=o: self.fs.rados(["truncate", o, "0"]),
 185                     DAMAGED_ON_START
 186             ))
 187
 188         # OMAP value corruptions
 189         for o, k in omap_keys:
 190             if o.startswith("100."):
 191                 # Anything in rank 0's 'mydir'
 192                 expectation = DAMAGED_ON_START
 193             else:
 194                 expectation = EIO_ON_LS
 195
 196             mutations.append(
 197                 MetadataMutation(
 198                     o,
 199                     "Corrupt omap key {0}:{1}".format(o, k),
 200                     lambda o=o,k=k: self.fs.rados(["setomapval", o, k, junk]),
 201                     expectation,
 202                     get_path(o, k)
 203                 )
 204             )
 205
 206         # OMAP header corruptions
 207         for obj_id in omap_header_objs:
 208             if re.match("60.\.00000000", obj_id) \
 209                     or obj_id in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
 210                 expectation = DAMAGED_ON_START
 211             else:
 212                 expectation = NO_DAMAGE
 213
 214             log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
 215                 obj_id, expectation
 216             ))
 217
 218             mutations.append(
 219                 MetadataMutation(
 220                     obj_id,
 221                     "Corrupt omap header on {0}".format(obj_id),
 222                     lambda o=obj_id: self.fs.rados(["setomapheader", o, junk]),
 223                     expectation
 224                 )
 225             )
 226
 227         results = {}
 228
 229         for mutation in mutations:
 230             log.info("Applying mutation '{0}'".format(mutation.desc))
 231
 232             # Reset MDS state
 233             self.mount_a.umount_wait(force=True)
 234             self.fs.mds_stop()
 235             self.fs.mds_fail()
 236             self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
 237
 238             # Reset RADOS pool state
 239             self.fs.rados(['import', '/tmp/metadata.bin'])
 240
 241             # Inject the mutation
 242             mutation.mutate_fn()
 243
 244             # Try starting the MDS
 245             self.fs.mds_restart()
 246
 247             # How long we'll wait between starting a daemon and expecting
 248             # it to make it through startup, and potentially declare itself
 249             # damaged to the mon cluster.
 250             startup_timeout = 60
 251
 252             if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
 253                 if mutation.expectation == DAMAGED_ON_START:
 254                     # The MDS may pass through active before making it to damaged
 255                     try:
 256                         self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
 257                     except RuntimeError:
 258                         pass
 259
 260                 # Wait for MDS to either come up or go into damaged state
 261                 try:
 262                     self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
 263                 except RuntimeError:
 264                     crashed = False
 265                     # Didn't make it to healthy or damaged, did it crash?
 266                     for daemon_id, daemon in self.fs.mds_daemons.items():
 267                         if daemon.proc and daemon.proc.finished:
 268                             crashed = True
 269                             log.error("Daemon {0} crashed!".format(daemon_id))
 270                             daemon.proc = None  # So that subsequent stop() doesn't raise error
 271                     if not crashed:
 272                         # Didn't go health, didn't go damaged, didn't crash, so what?
 273                         raise
 274                     else:
 275                         log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
 276                         results[mutation] = CRASHED
 277                         continue
 278                 if self.is_marked_damaged(0):
 279                     log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
 280                     results[mutation] = DAMAGED_ON_START
 281                     continue
 282                 else:
 283                     log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
 284             else:
 285                 try:
 286                     self.wait_until_true(self.fs.are_daemons_healthy, 60)
 287                 except RuntimeError:
 288                     log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
 289                     if self.is_marked_damaged(0):
 290                         results[mutation] = DAMAGED_ON_START
 291                     else:
 292                         results[mutation] = FAILED_SERVER
 293                     continue
 294                 log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
 295
 296             # MDS is up, should go damaged on ls or client mount
 297             self.mount_a.mount()
 298             self.mount_a.wait_until_mounted()
 299             if mutation.ls_path == ".":
 300                 proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
 301             else:
 302                 proc = self.mount_a.stat(mutation.ls_path, wait=False)
 303
 304             if mutation.expectation == DAMAGED_ON_LS:
 305                 try:
 306                     self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
 307                     log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
 308                     results[mutation] = DAMAGED_ON_LS
 309                 except RuntimeError:
 310                     if self.fs.are_daemons_healthy():
 311                         log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
 312                             mutation.desc))
 313                         results[mutation] = NO_DAMAGE
 314                     else:
 315                         log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
 316                         results[mutation] = FAILED_SERVER
 317
 318             else:
 319                 try:
 320                     wait([proc], 20)
 321                     log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
 322                     results[mutation] = NO_DAMAGE
 323                 except MaxWhileTries:
 324                     log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
 325                     results[mutation] = FAILED_CLIENT
 326                 except CommandFailedError as e:
 327                     if e.exitstatus == errno.EIO:
 328                         log.info("Result: EIO on client")
 329                         results[mutation] = EIO_ON_LS
 330                     else:
 331                         log.info("Result: unexpected error {0} on client".format(e))
 332                         results[mutation] = FAILED_CLIENT
 333
 334             if mutation.expectation == EIO_ON_LS:
 335                 # EIOs mean something handled by DamageTable: assert that it has
 336                 # been populated
 337                 damage = json.loads(
 338                     self.fs.mon_manager.raw_cluster_cmd(
 339                         'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
 340                 if len(damage) == 0:
 341                     results[mutation] = EIO_NO_DAMAGE
 342
 343         failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
 344         if failures:
 345             log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
 346             for mutation, result in failures:
 347                 log.error("  Expected '{0}' actually '{1}' from '{2}'".format(
 348                     mutation.expectation, result, mutation.desc
 349                 ))
 350             raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
 351         else:
 352             log.info("All {0} mutations had expected outcomes".format(len(mutations)))
 353
 354     def test_damaged_dentry(self):
 355         # Damage to dentrys is interesting because it leaves the
 356         # directory's `complete` flag in a subtle state where
 357         # we have marked the dir complete in order that folks
 358         # can access it, but in actual fact there is a dentry
 359         # missing
 360         self.mount_a.run_shell(["mkdir", "subdir/"])
 361
 362         self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
 363         self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
 364
 365         subdir_ino = self.mount_a.path_to_ino("subdir")
 366
 367         self.mount_a.umount_wait()
 368         for mds_name in self.fs.get_active_names():
 369             self.fs.mds_asok(["flush", "journal"], mds_name)
 370
 371         self.fs.mds_stop()
 372         self.fs.mds_fail()
 373
 374         # Corrupt a dentry
 375         junk = "deadbeef" * 10
 376         dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
 377         self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
 378
 379         # Start up and try to list it
 380         self.fs.mds_restart()
 381         self.fs.wait_for_daemons()
 382
 383         self.mount_a.mount()
 384         self.mount_a.wait_until_mounted()
 385         dentries = self.mount_a.ls("subdir/")
 386
 387         # The damaged guy should have disappeared
 388         self.assertEqual(dentries, ["file_undamaged"])
 389
 390         # I should get ENOENT if I try and read it normally, because
 391         # the dir is considered complete
 392         try:
 393             self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
 394         except CommandFailedError as e:
 395             self.assertEqual(e.exitstatus, errno.ENOENT)
 396         else:
 397             raise AssertionError("Expected ENOENT")
 398
 399         # The fact that there is damaged should have bee recorded
 400         damage = json.loads(
 401             self.fs.mon_manager.raw_cluster_cmd(
 402                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 403                 "damage", "ls", '--format=json-pretty'))
 404         self.assertEqual(len(damage), 1)
 405         damage_id = damage[0]['id']
 406
 407         # If I try to create a dentry with the same name as the damaged guy
 408         # then that should be forbidden
 409         try:
 410             self.mount_a.touch("subdir/file_to_be_damaged")
 411         except CommandFailedError as e:
 412             self.assertEqual(e.exitstatus, errno.EIO)
 413         else:
 414             raise AssertionError("Expected EIO")
 415
 416         # Attempting that touch will clear the client's complete flag, now
 417         # when I stat it I'll get EIO instead of ENOENT
 418         try:
 419             self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
 420         except CommandFailedError as e:
 421             if isinstance(self.mount_a, FuseMount):
 422                 self.assertEqual(e.exitstatus, errno.EIO)
 423             else:
 424                 # Kernel client handles this case differently
 425                 self.assertEqual(e.exitstatus, errno.ENOENT)
 426         else:
 427             raise AssertionError("Expected EIO")
 428
 429         nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
 430         self.assertEqual(nfiles, "2")
 431
 432         self.mount_a.umount_wait()
 433
 434         # Now repair the stats
 435         scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"])
 436         log.info(json.dumps(scrub_json, indent=2))
 437
 438         self.assertEqual(scrub_json["passed_validation"], False)
 439         self.assertEqual(scrub_json["raw_stats"]["checked"], True)
 440         self.assertEqual(scrub_json["raw_stats"]["passed"], False)
 441
 442         # Check that the file count is now correct
 443         self.mount_a.mount()
 444         self.mount_a.wait_until_mounted()
 445         nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
 446         self.assertEqual(nfiles, "1")
 447
 448         # Clean up the omap object
 449         self.fs.rados(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
 450
 451         # Clean up the damagetable entry
 452         self.fs.mon_manager.raw_cluster_cmd(
 453             'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 454             "damage", "rm", "{did}".format(did=damage_id))
 455
 456         # Now I should be able to create a file with the same name as the
 457         # damaged guy if I want.
 458         self.mount_a.touch("subdir/file_to_be_damaged")
 459
 460     def test_open_ino_errors(self):
 461         """
 462         That errors encountered during opening inos are properly propagated
 463         """
 464
 465         self.mount_a.run_shell(["mkdir", "dir1"])
 466         self.mount_a.run_shell(["touch", "dir1/file1"])
 467         self.mount_a.run_shell(["mkdir", "dir2"])
 468         self.mount_a.run_shell(["touch", "dir2/file2"])
 469         self.mount_a.run_shell(["mkdir", "testdir"])
 470         self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
 471         self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
 472
 473         file1_ino = self.mount_a.path_to_ino("dir1/file1")
 474         file2_ino = self.mount_a.path_to_ino("dir2/file2")
 475         dir2_ino = self.mount_a.path_to_ino("dir2")
 476
 477         # Ensure everything is written to backing store
 478         self.mount_a.umount_wait()
 479         self.fs.mds_asok(["flush", "journal"])
 480
 481         # Drop everything from the MDS cache
 482         self.mds_cluster.mds_stop()
 483         self.fs.journal_tool(['journal', 'reset'])
 484         self.mds_cluster.mds_fail_restart()
 485         self.fs.wait_for_daemons()
 486
 487         self.mount_a.mount()
 488
 489         # Case 1: un-decodeable backtrace
 490
 491         # Validate that the backtrace is present and decodable
 492         self.fs.read_backtrace(file1_ino)
 493         # Go corrupt the backtrace of alpha/target (used for resolving
 494         # bravo/hardlink).
 495         self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
 496
 497         # Check that touching the hardlink gives EIO
 498         ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
 499         try:
 500             ran.wait()
 501         except CommandFailedError:
 502             self.assertTrue("Input/output error" in ran.stderr.getvalue())
 503
 504         # Check that an entry is created in the damage table
 505         damage = json.loads(
 506             self.fs.mon_manager.raw_cluster_cmd(
 507                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 508                 "damage", "ls", '--format=json-pretty'))
 509         self.assertEqual(len(damage), 1)
 510         self.assertEqual(damage[0]['damage_type'], "backtrace")
 511         self.assertEqual(damage[0]['ino'], file1_ino)
 512
 513         self.fs.mon_manager.raw_cluster_cmd(
 514             'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 515             "damage", "rm", str(damage[0]['id']))
 516
 517
 518         # Case 2: missing dirfrag for the target inode
 519
 520         self.fs.rados(["rm", "{0:x}.00000000".format(dir2_ino)])
 521
 522         # Check that touching the hardlink gives EIO
 523         ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
 524         try:
 525             ran.wait()
 526         except CommandFailedError:
 527             self.assertTrue("Input/output error" in ran.stderr.getvalue())
 528
 529         # Check that an entry is created in the damage table
 530         damage = json.loads(
 531             self.fs.mon_manager.raw_cluster_cmd(
 532                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 533                 "damage", "ls", '--format=json-pretty'))
 534         self.assertEqual(len(damage), 2)
 535         if damage[0]['damage_type'] == "backtrace" :
 536             self.assertEqual(damage[0]['ino'], file2_ino)
 537             self.assertEqual(damage[1]['damage_type'], "dir_frag")
 538             self.assertEqual(damage[1]['ino'], dir2_ino)
 539         else:
 540             self.assertEqual(damage[0]['damage_type'], "dir_frag")
 541             self.assertEqual(damage[0]['ino'], dir2_ino)
 542             self.assertEqual(damage[1]['damage_type'], "backtrace")
 543             self.assertEqual(damage[1]['ino'], file2_ino)
 544
 545         for entry in damage:
 546             self.fs.mon_manager.raw_cluster_cmd(
 547                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
 548                 "damage", "rm", str(entry['id']))