3 Test our tools for recovering metadata from the data pool into an alternate pool
9 from textwrap import dedent
11 from collections import namedtuple, defaultdict
13 from teuthology.orchestra.run import CommandFailedError
14 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
16 log = logging.getLogger(__name__)
19 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
22 class OverlayWorkload(object):
23 def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
24 self._orig_fs = orig_fs
25 self._recovery_fs = recovery_fs
26 self._orig_mount = orig_mount
27 self._recovery_mount = recovery_mount
28 self._initial_state = None
30 # Accumulate backtraces for every failed validation, and return them. Backtraces
31 # are rather verbose, but we only see them when something breaks, and they
32 # let us see which check failed without having to decorate each check with
36 def assert_equal(self, a, b):
39 raise AssertionError("{0} != {1}".format(a, b))
40 except AssertionError as e:
42 ValidationError(e, traceback.format_exc(3))
47 Write the workload files to the mount
49 raise NotImplementedError()
53 Read from the mount and validate that the workload files are present (i.e. have
54 survived or been reconstructed from the test scenario)
56 raise NotImplementedError()
60 Damage the filesystem pools in ways that will be interesting to recover from. By
61 default just wipe everything in the metadata pool
63 # Delete every object in the metadata pool
64 objects = self._orig_fs.rados(["ls"]).split("\n")
66 self._orig_fs.rados(["rm", o])
70 Called after client unmount, after write: flush whatever you want
72 self._orig_fs.mds_asok(["flush", "journal"])
73 self._recovery_fs.mds_asok(["flush", "journal"])
76 class SimpleOverlayWorkload(OverlayWorkload):
78 Single file, single directory, check that it gets recovered and so does its size
81 self._orig_mount.run_shell(["mkdir", "subdir"])
82 self._orig_mount.write_n_mb("subdir/sixmegs", 6)
83 self._initial_state = self._orig_mount.stat("subdir/sixmegs")
86 self._recovery_mount.run_shell(["ls", "subdir"])
87 st = self._recovery_mount.stat("subdir/sixmegs")
88 self.assert_equal(st['st_size'], self._initial_state['st_size'])
91 class TestRecoveryPool(CephFSTestCase):
94 REQUIRE_RECOVERY_FILESYSTEM = True
96 def is_marked_damaged(self, rank):
97 mds_map = self.fs.get_mds_map()
98 return rank in mds_map['damaged']
100 def _rebuild_metadata(self, workload, other_pool=None, workers=1):
102 That when all objects in metadata pool are removed, we can rebuild a metadata pool
103 based on the contents of a data pool, and a client can see and read our files.
106 # First, inject some files
110 # Unmount the client and flush the journal: the tool should also cope with
111 # situations where there is dirty metadata, but we'll test that separately
112 self.mount_a.umount_wait()
113 self.mount_b.umount_wait()
116 # Create the alternate pool if requested
117 recovery_fs = self.recovery_fs.name
118 recovery_pool = self.recovery_fs.get_metadata_pool_name()
119 self.recovery_fs.data_scan(['init', '--force-init',
120 '--filesystem', recovery_fs,
121 '--alternate-pool', recovery_pool])
122 self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
123 self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
124 self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
125 self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
131 # After recovery, we need the MDS to not be strict about stats (in production these options
132 # are off by default, but in QA we need to explicitly disable them)
133 self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
134 self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
136 # Apply any data damage the workload wants
139 # Reset the MDS map in case multiple ranks were in play: recovery procedure
140 # only understands how to rebuild metadata under rank 0
141 self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
142 '--yes-i-really-mean-it')
144 def get_state(mds_id):
145 info = self.mds_cluster.get_mds_info(mds_id)
146 return info['state'] if info is not None else None
148 self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
149 self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
150 self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
152 # Run the recovery procedure
154 with self.assertRaises(CommandFailedError):
155 # Normal reset should fail when no objects are present, we'll use --force instead
156 self.fs.journal_tool(["journal", "reset"])
159 self.fs.data_scan(['scan_extents', '--alternate-pool',
160 recovery_pool, '--filesystem', self.fs.name,
161 self.fs.get_data_pool_name()])
162 self.fs.data_scan(['scan_inodes', '--alternate-pool',
163 recovery_pool, '--filesystem', self.fs.name,
164 '--force-corrupt', '--force-init',
165 self.fs.get_data_pool_name()])
166 self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event',
167 'recover_dentries', 'list',
168 '--alternate-pool', recovery_pool])
170 self.fs.data_scan(['init', '--force-init', '--filesystem',
172 self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
173 '--force-corrupt', '--force-init',
174 self.fs.get_data_pool_name()])
175 self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'event',
176 'recover_dentries', 'list'])
178 self.fs.journal_tool(['--rank=' + recovery_fs + ":0", 'journal',
180 self.fs.journal_tool(['--rank=' + self.fs.name + ":0", 'journal',
182 self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
185 # Mark the MDS repaired
186 self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
189 self.fs.mds_restart()
190 self.recovery_fs.mds_restart()
191 self.fs.wait_for_daemons()
192 self.recovery_fs.wait_for_daemons()
193 for mds_id in self.recovery_fs.mds_ids:
194 self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + mds_id,
195 'injectargs', '--debug-mds=20')
196 self.fs.mon_manager.raw_cluster_cmd('daemon', "mds." + mds_id,
198 'recursive', 'repair')
199 log.info(str(self.mds_cluster.status()))
203 self.mount_b.mount(mount_fs_name=recovery_fs)
204 self.mount_a.wait_until_mounted()
205 self.mount_b.wait_until_mounted()
207 # See that the files are present and correct
208 errors = workload.validate()
210 log.error("Validation errors found: {0}".format(len(errors)))
212 log.error(e.exception)
213 log.error(e.backtrace)
214 raise AssertionError("Validation failed, first error: {0}\n{1}".format(
215 errors[0].exception, errors[0].backtrace
218 def test_rebuild_simple(self):
219 self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
220 self.mount_a, self.mount_b))