Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / qa / tasks / cephfs / test_forward_scrub.py
1
2 """
3 Test that the forward scrub functionality can traverse metadata and apply
4 requested tags, on well formed metadata.
5
6 This is *not* the real testing for forward scrub, which will need to test
7 how the functionality responds to damaged metadata.
8
9 """
10 import json
11
12 import logging
13 from collections import namedtuple
14 from textwrap import dedent
15
16 from teuthology.orchestra.run import CommandFailedError
17 from tasks.cephfs.cephfs_test_case import CephFSTestCase
18
19 import struct
20
21 log = logging.getLogger(__name__)
22
23
24 ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
25
26
27 class TestForwardScrub(CephFSTestCase):
28     MDSS_REQUIRED = 1
29
30     def _read_str_xattr(self, pool, obj, attr):
31         """
32         Read a ceph-encoded string from a rados xattr
33         """
34         output = self.fs.rados(["getxattr", obj, attr], pool=pool)
35         strlen = struct.unpack('i', output[0:4])[0]
36         return output[4:(4 + strlen)]
37
38     def _get_paths_to_ino(self):
39         inos = {}
40         p = self.mount_a.run_shell(["find", "./"])
41         paths = p.stdout.getvalue().strip().split()
42         for path in paths:
43             inos[path] = self.mount_a.path_to_ino(path)
44
45         return inos
46
47     def test_apply_tag(self):
48         self.mount_a.run_shell(["mkdir", "parentdir"])
49         self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
50         self.mount_a.run_shell(["touch", "rfile"])
51         self.mount_a.run_shell(["touch", "parentdir/pfile"])
52         self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
53
54         # Build a structure mapping path to inode, as we will later want
55         # to check object by object and objects are named after ino number
56         inos = self._get_paths_to_ino()
57
58         # Flush metadata: this is a friendly test of forward scrub so we're skipping
59         # the part where it's meant to cope with dirty metadata
60         self.mount_a.umount_wait()
61         self.fs.mds_asok(["flush", "journal"])
62
63         tag = "mytag"
64
65         # Execute tagging forward scrub
66         self.fs.mds_asok(["tag", "path", "/parentdir", tag])
67         # Wait for completion
68         import time
69         time.sleep(10)
70         # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
71         # watch that instead
72
73         # Check that dirs were tagged
74         for dirpath in ["./parentdir", "./parentdir/childdir"]:
75             self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
76
77         # Check that files were tagged
78         for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
79             self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
80
81         # This guy wasn't in the tag path, shouldn't have been tagged
82         self.assertUntagged(inos["./rfile"])
83
84     def assertUntagged(self, ino):
85         file_obj_name = "{0:x}.00000000".format(ino)
86         with self.assertRaises(CommandFailedError):
87             self._read_str_xattr(
88                 self.fs.get_data_pool_name(),
89                 file_obj_name,
90                 "scrub_tag"
91             )
92
93     def assertTagged(self, ino, tag, pool):
94         file_obj_name = "{0:x}.00000000".format(ino)
95         wrote = self._read_str_xattr(
96             pool,
97             file_obj_name,
98             "scrub_tag"
99         )
100         self.assertEqual(wrote, tag)
101
102     def _validate_linkage(self, expected):
103         inos = self._get_paths_to_ino()
104         try:
105             self.assertDictEqual(inos, expected)
106         except AssertionError:
107             log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
108             log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
109             raise
110
111     def test_orphan_scan(self):
112         # Create some files whose metadata we will flush
113         self.mount_a.run_python(dedent("""
114             import os
115             mount_point = "{mount_point}"
116             parent = os.path.join(mount_point, "parent")
117             os.mkdir(parent)
118             flushed = os.path.join(parent, "flushed")
119             os.mkdir(flushed)
120             for f in ["alpha", "bravo", "charlie"]:
121                 open(os.path.join(flushed, f), 'w').write(f)
122         """.format(mount_point=self.mount_a.mountpoint)))
123
124         inos = self._get_paths_to_ino()
125
126         # Flush journal
127         # Umount before flush to avoid cap releases putting
128         # things we don't want in the journal later.
129         self.mount_a.umount_wait()
130         self.fs.mds_asok(["flush", "journal"])
131
132         # Create a new inode that's just in the log, i.e. would
133         # look orphaned to backward scan if backward scan wisnae
134         # respectin' tha scrub_tag xattr.
135         self.mount_a.mount()
136         self.mount_a.run_shell(["mkdir", "parent/unflushed"])
137         self.mount_a.run_shell(["dd", "if=/dev/urandom",
138                                 "of=./parent/unflushed/jfile",
139                                 "bs=1M", "count=8"])
140         inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
141         inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
142         self.mount_a.umount_wait()
143
144         # Orphan an inode by deleting its dentry
145         # Our victim will be.... bravo.
146         self.mount_a.umount_wait()
147         self.fs.mds_stop()
148         self.fs.mds_fail()
149         self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
150         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
151         frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
152         self.fs.rados(["rmomapkey", frag_obj_id, "bravo_head"])
153
154         self.fs.mds_restart()
155         self.fs.wait_for_daemons()
156
157         # See that the orphaned file is indeed missing from a client's POV
158         self.mount_a.mount()
159         damaged_state = self._get_paths_to_ino()
160         self.assertNotIn("./parent/flushed/bravo", damaged_state)
161         self.mount_a.umount_wait()
162
163         # Run a tagging forward scrub
164         tag = "mytag123"
165         self.fs.mds_asok(["tag", "path", "/parent", tag])
166
167         # See that the orphan wisnae tagged
168         self.assertUntagged(inos['./parent/flushed/bravo'])
169
170         # See that the flushed-metadata-and-still-present files are tagged
171         self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
172         self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
173
174         # See that journalled-but-not-flushed file *was* tagged
175         self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
176
177         # Run cephfs-data-scan targeting only orphans
178         self.fs.mds_stop()
179         self.fs.mds_fail()
180         self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
181         self.fs.data_scan([
182             "scan_inodes",
183             "--filter-tag", tag,
184             self.fs.get_data_pool_name()
185         ])
186
187         # After in-place injection stats should be kosher again
188         self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
189         self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
190
191         # And we should have all the same linkage we started with,
192         # and no lost+found, and no extra inodes!
193         self.fs.mds_restart()
194         self.fs.wait_for_daemons()
195         self.mount_a.mount()
196         self._validate_linkage(inos)
197
198     def _stash_inotable(self):
199         # Get all active ranks
200         ranks = self.fs.get_all_mds_rank()
201
202         inotable_dict = {}
203         for rank in ranks:
204             inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
205             print "Trying to fetch inotable object: " + inotable_oid
206
207             #self.fs.get_metadata_object("InoTable", "mds0_inotable")
208             inotable_raw = self.fs.get_metadata_object_raw(inotable_oid)
209             inotable_dict[inotable_oid] = inotable_raw
210         return inotable_dict
211
212     def test_inotable_sync(self):
213         self.mount_a.write_n_mb("file1_sixmegs", 6)
214
215         # Flush journal
216         self.mount_a.umount_wait()
217         self.fs.mds_asok(["flush", "journal"])
218
219         inotable_copy = self._stash_inotable()
220
221         self.mount_a.mount()
222
223         self.mount_a.write_n_mb("file2_sixmegs", 6)
224         self.mount_a.write_n_mb("file3_sixmegs", 6)
225
226         inos = self._get_paths_to_ino()
227
228         # Flush journal
229         self.mount_a.umount_wait()
230         self.fs.mds_asok(["flush", "journal"])
231
232         self.mount_a.umount_wait()
233
234         with self.assert_cluster_log("inode table repaired", invert_match=True):
235             self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
236
237         self.mds_cluster.mds_stop()
238         self.mds_cluster.mds_fail()
239
240         # Truncate the journal (to ensure the inotable on disk
241         # is all that will be in the InoTable in memory)
242
243         self.fs.journal_tool(["event", "splice",
244             "--inode={0}".format(inos["./file2_sixmegs"]), "summary"])
245
246         self.fs.journal_tool(["event", "splice",
247             "--inode={0}".format(inos["./file3_sixmegs"]), "summary"])
248
249         # Revert to old inotable.
250         for key, value in inotable_copy.iteritems():
251            self.fs.put_metadata_object_raw(key, value)
252
253         self.mds_cluster.mds_restart()
254         self.fs.wait_for_daemons()
255
256         with self.assert_cluster_log("inode table repaired"):
257             self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
258
259         self.mds_cluster.mds_stop()
260         table_text = self.fs.table_tool(["0", "show", "inode"])
261         table = json.loads(table_text)
262         self.assertGreater(
263                 table['0']['data']['inotable']['free'][0]['start'],
264                 inos['./file3_sixmegs'])
265
266     def test_backtrace_repair(self):
267         """
268         That the MDS can repair an inodes backtrace in the data pool
269         if it is found to be damaged.
270         """
271         # Create a file for subsequent checks
272         self.mount_a.run_shell(["mkdir", "parent_a"])
273         self.mount_a.run_shell(["touch", "parent_a/alpha"])
274         file_ino = self.mount_a.path_to_ino("parent_a/alpha")
275
276         # That backtrace and layout are written after initial flush
277         self.fs.mds_asok(["flush", "journal"])
278         backtrace = self.fs.read_backtrace(file_ino)
279         self.assertEqual(['alpha', 'parent_a'],
280                          [a['dname'] for a in backtrace['ancestors']])
281
282         # Go corrupt the backtrace
283         self.fs._write_data_xattr(file_ino, "parent",
284                                   "oh i'm sorry did i overwrite your xattr?")
285
286         with self.assert_cluster_log("bad backtrace on inode"):
287             self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
288         self.fs.mds_asok(["flush", "journal"])
289         backtrace = self.fs.read_backtrace(file_ino)
290         self.assertEqual(['alpha', 'parent_a'],
291                          [a['dname'] for a in backtrace['ancestors']])