src/ceph/qa/tasks/cephfs/test_recovery_pool.py

   1
   2 """
   3 Test our tools for recovering metadata from the data pool into an alternate pool
   4 """
   5 import json
   6
   7 import logging
   8 import os
   9 from textwrap import dedent
  10 import traceback
  11 from collections import namedtuple, defaultdict
  12
  13 from teuthology.orchestra.run import CommandFailedError
  14 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
  15
  16 log = logging.getLogger(__name__)
  17
  18
  19 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  20
  21
  22 class OverlayWorkload(object):
  23     def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
  24         self._orig_fs = orig_fs
  25         self._recovery_fs = recovery_fs
  26         self._orig_mount = orig_mount
  27         self._recovery_mount = recovery_mount
  28         self._initial_state = None
  29
  30         # Accumulate backtraces for every failed validation, and return them.  Backtraces
  31         # are rather verbose, but we only see them when something breaks, and they
  32         # let us see which check failed without having to decorate each check with
  33         # a string
  34         self._errors = []
  35
  36     def assert_equal(self, a, b):
  37         try:
  38             if a != b:
  39                 raise AssertionError("{0} != {1}".format(a, b))
  40         except AssertionError as e:
  41             self._errors.append(
  42                 ValidationError(e, traceback.format_exc(3))
  43             )
  44
  45     def write(self):
  46         """
  47         Write the workload files to the mount
  48         """
  49         raise NotImplementedError()
  50
  51     def validate(self):
  52         """
  53         Read from the mount and validate that the workload files are present (i.e. have
  54         survived or been reconstructed from the test scenario)
  55         """
  56         raise NotImplementedError()
  57
  58     def damage(self):
  59         """
  60         Damage the filesystem pools in ways that will be interesting to recover from.  By
  61         default just wipe everything in the metadata pool
  62         """
  63         # Delete every object in the metadata pool
  64         objects = self._orig_fs.rados(["ls"]).split("\n")
  65         for o in objects:
  66             self._orig_fs.rados(["rm", o])
  67
  68     def flush(self):
  69         """
  70         Called after client unmount, after write: flush whatever you want
  71         """
  72         self._orig_fs.mds_asok(["flush", "journal"])
  73         self._recovery_fs.mds_asok(["flush", "journal"])
  74
  75
  76 class SimpleOverlayWorkload(OverlayWorkload):
  77     """
  78     Single file, single directory, check that it gets recovered and so does its size
  79     """
  80     def write(self):
  81         self._orig_mount.run_shell(["mkdir", "subdir"])
  82         self._orig_mount.write_n_mb("subdir/sixmegs", 6)
  83         self._initial_state = self._orig_mount.stat("subdir/sixmegs")
  84
  85     def validate(self):
  86         self._recovery_mount.run_shell(["ls", "subdir"])
  87         st = self._recovery_mount.stat("subdir/sixmegs")
  88         self.assert_equal(st['st_size'], self._initial_state['st_size'])
  89         return self._errors
  90
  91 class TestRecoveryPool(CephFSTestCase):
  92     MDSS_REQUIRED = 2
  93     CLIENTS_REQUIRED = 2
  94     REQUIRE_RECOVERY_FILESYSTEM = True
  95
  96     def is_marked_damaged(self, rank):
  97         mds_map = self.fs.get_mds_map()
  98         return rank in mds_map['damaged']
  99
 100     def _rebuild_metadata(self, workload, other_pool=None, workers=1):
 101         """
 102         That when all objects in metadata pool are removed, we can rebuild a metadata pool
 103         based on the contents of a data pool, and a client can see and read our files.
 104         """
 105
 106         # First, inject some files
 107
 108         workload.write()
 109
 110         # Unmount the client and flush the journal: the tool should also cope with
 111         # situations where there is dirty metadata, but we'll test that separately
 112         self.mount_a.umount_wait()
 113         self.mount_b.umount_wait()
 114         workload.flush()
 115
 116         # Create the alternate pool if requested
 117         recovery_fs = self.recovery_fs.name
 118         recovery_pool = self.recovery_fs.get_metadata_pool_name()
 119         self.recovery_fs.data_scan(['init', '--force-init',
 120                                     '--filesystem', recovery_fs,
 121                                     '--alternate-pool', recovery_pool])
 122         self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
 123         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
 124         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
 125         self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
 126
 127         # Stop the MDS
 128         self.fs.mds_stop()
 129         self.fs.mds_fail()
 130
 131         # After recovery, we need the MDS to not be strict about stats (in production these options
 132         # are off by default, but in QA we need to explicitly disable them)
 133         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
 134         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
 135
 136         # Apply any data damage the workload wants
 137         workload.damage()
 138
 139         # Reset the MDS map in case multiple ranks were in play: recovery procedure
 140         # only understands how to rebuild metadata under rank 0
 141         self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
 142                 '--yes-i-really-mean-it')
 143
 144         def get_state(mds_id):
 145             info = self.mds_cluster.get_mds_info(mds_id)
 146             return info['state'] if info is not None else None
 147
 148         self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
 149         self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
 150         self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
 151
 152         # Run the recovery procedure
 153         if False:
 154             with self.assertRaises(CommandFailedError):
 155                 # Normal reset should fail when no objects are present, we'll use --force instead
 156                 self.fs.journal_tool(["journal", "reset"])
 157
 158         self.fs.mds_stop()
 159         self.fs.data_scan(['scan_extents', '--alternate-pool',
 160                            recovery_pool, '--filesystem', self.fs.name,
 161                            self.fs.get_data_pool_name()])
 162         self.fs.data_scan(['scan_inodes', '--alternate-pool',
 163                            recovery_pool, '--filesystem', self.fs.name,
 164                            '--force-corrupt', '--force-init',
 165                            self.fs.get_data_pool_name()])
 166         self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event',
 167                               'recover_dentries', 'list',
 168                               '--alternate-pool', recovery_pool])
 169
 170         self.fs.data_scan(['init', '--force-init', '--filesystem',
 171                            self.fs.name])
 172         self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
 173                            '--force-corrupt', '--force-init',
 174                            self.fs.get_data_pool_name()])
 175         self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event',
 176                               'recover_dentries', 'list'])
 177
 178         self.fs.journal_tool(['--rank=' + recovery_fs + ":0", 'journal',
 179                               'reset', '--force'])
 180         self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'journal',
 181                               'reset', '--force'])
 182         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
 183                                             recovery_fs + ":0")
 184
 185         # Mark the MDS repaired
 186         self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
 187
 188         # Start the MDS
 189         self.fs.mds_restart()
 190         self.recovery_fs.mds_restart()
 191         self.fs.wait_for_daemons()
 192         self.recovery_fs.wait_for_daemons()
 193         for mds_id in self.recovery_fs.mds_ids:
 194             self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + mds_id,
 195                                                 'injectargs', '--debug-mds=20')
 196             self.fs.mon_manager.raw_cluster_cmd('daemon', "mds." + mds_id,
 197                                                 'scrub_path', '/',
 198                                                 'recursive', 'repair')
 199         log.info(str(self.mds_cluster.status()))
 200
 201         # Mount a client
 202         self.mount_a.mount()
 203         self.mount_b.mount(mount_fs_name=recovery_fs)
 204         self.mount_a.wait_until_mounted()
 205         self.mount_b.wait_until_mounted()
 206
 207         # See that the files are present and correct
 208         errors = workload.validate()
 209         if errors:
 210             log.error("Validation errors found: {0}".format(len(errors)))
 211             for e in errors:
 212                 log.error(e.exception)
 213                 log.error(e.backtrace)
 214             raise AssertionError("Validation failed, first error: {0}\n{1}".format(
 215                 errors[0].exception, errors[0].backtrace
 216             ))
 217
 218     def test_rebuild_simple(self):
 219         self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
 220                                                      self.mount_a, self.mount_b))