Fix some bugs when testing opensds ansible
[stor4nfv.git] / src / ceph / qa / tasks / cephfs / test_scrub_checks.py
1 """
2 MDS admin socket scrubbing-related tests.
3 """
4 import json
5 import logging
6 import errno
7 import time
8 from teuthology.exceptions import CommandFailedError
9 import os
10 from tasks.cephfs.cephfs_test_case import CephFSTestCase
11
12 log = logging.getLogger(__name__)
13
14
15 class TestScrubChecks(CephFSTestCase):
16     """
17     Run flush and scrub commands on the specified files in the filesystem. This
18     task will run through a sequence of operations, but it is not comprehensive
19     on its own -- it doesn't manipulate the mds cache state to test on both
20     in- and out-of-memory parts of the hierarchy. So it's designed to be run
21     multiple times within a single test run, so that the test can manipulate
22     memory state.
23
24     Usage:
25     mds_scrub_checks:
26       mds_rank: 0
27       path: path/to/test/dir
28       client: 0
29       run_seq: [0-9]+
30
31     Increment the run_seq on subsequent invocations within a single test run;
32     it uses that value to generate unique folder and file names.
33     """
34
35     MDSS_REQUIRED = 1
36     CLIENTS_REQUIRED = 1
37
38     def test_scrub_checks(self):
39         self._checks(0)
40         self._checks(1)
41
42     def _checks(self, run_seq):
43         mds_rank = 0
44         test_dir = "scrub_test_path"
45
46         abs_test_path = "/{0}".format(test_dir)
47
48         log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
49         client_path = os.path.join(self.mount_a.mountpoint, test_dir)
50         log.info("client_path: {0}".format(client_path))
51
52         log.info("Cloning repo into place")
53         repo_path = self.clone_repo(self.mount_a, client_path)
54
55         log.info("Initiating mds_scrub_checks on mds.{id_}, " +
56                  "test_path {path}, run_seq {seq}".format(
57                      id_=mds_rank, path=abs_test_path, seq=run_seq)
58                  )
59
60
61         success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0)
62
63         nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path)
64         self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep),
65                           lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
66         self.asok_command(mds_rank, "scrub_path {nep}".format(nep=nep),
67                           lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
68
69         test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path)
70         dirpath = "{repo_path}/suites".format(repo_path=test_repo_path)
71
72         if run_seq == 0:
73             log.info("First run: flushing {dirpath}".format(dirpath=dirpath))
74             command = "flush_path {dirpath}".format(dirpath=dirpath)
75             self.asok_command(mds_rank, command, success_validator)
76         command = "scrub_path {dirpath}".format(dirpath=dirpath)
77         self.asok_command(mds_rank, command, success_validator)
78
79         filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format(
80             repo_path=test_repo_path)
81         if run_seq == 0:
82             log.info("First run: flushing {filepath}".format(filepath=filepath))
83             command = "flush_path {filepath}".format(filepath=filepath)
84             self.asok_command(mds_rank, command, success_validator)
85         command = "scrub_path {filepath}".format(filepath=filepath)
86         self.asok_command(mds_rank, command, success_validator)
87
88         filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml". \
89             format(repo_path=test_repo_path)
90         command = "scrub_path {filepath}".format(filepath=filepath)
91         self.asok_command(mds_rank, command,
92                           lambda j, r: self.json_validator(j, r, "performed_validation",
93                                                            False))
94
95         if run_seq == 0:
96             log.info("First run: flushing base dir /")
97             command = "flush_path /"
98             self.asok_command(mds_rank, command, success_validator)
99         command = "scrub_path /"
100         self.asok_command(mds_rank, command, success_validator)
101
102         new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq)
103         test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path,
104                                                         i=run_seq)
105         self.mount_a.run_shell(["mkdir", new_dir])
106         command = "flush_path {dir}".format(dir=test_new_dir)
107         self.asok_command(mds_rank, command, success_validator)
108
109         new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path,
110                                                      i=run_seq)
111         test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path,
112                                                           i=run_seq)
113         self.mount_a.write_n_mb(new_file, 1)
114
115         command = "flush_path {file}".format(file=test_new_file)
116         self.asok_command(mds_rank, command, success_validator)
117
118         # check that scrub fails on errors
119         ino = self.mount_a.path_to_ino(new_file)
120         rados_obj_name = "{ino:x}.00000000".format(ino=ino)
121         command = "scrub_path {file}".format(file=test_new_file)
122
123         # Missing parent xattr -> ENODATA
124         self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name())
125         self.asok_command(mds_rank, command,
126                           lambda j, r: self.json_validator(j, r, "return_code", -errno.ENODATA))
127
128         # Missing object -> ENOENT
129         self.fs.rados(["rm", rados_obj_name], pool=self.fs.get_data_pool_name())
130         self.asok_command(mds_rank, command,
131                           lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
132
133         command = "flush_path /"
134         self.asok_command(mds_rank, command, success_validator)
135
136     def test_scrub_repair(self):
137         mds_rank = 0
138         test_dir = "scrub_repair_path"
139
140         self.mount_a.run_shell(["sudo", "mkdir", test_dir])
141         self.mount_a.run_shell(["sudo", "touch", "{0}/file".format(test_dir)])
142         dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir))
143
144         self.mount_a.umount_wait()
145
146         # flush journal entries to dirfrag objects, and expire journal
147         self.fs.mds_asok(['flush', 'journal'])
148         self.fs.mds_stop()
149
150         # remove the dentry from dirfrag, cause incorrect fragstat/rstat
151         self.fs.rados(["rmomapkey", dir_objname, "file_head"],
152                       pool=self.fs.get_metadata_pool_name())
153
154         self.fs.mds_fail_restart()
155         self.fs.wait_for_daemons()
156
157         self.mount_a.mount()
158         self.mount_a.wait_until_mounted()
159
160         # fragstat indicates the directory is not empty, rmdir should fail
161         with self.assertRaises(CommandFailedError) as ar:
162             self.mount_a.run_shell(["sudo", "rmdir", test_dir])
163         self.assertEqual(ar.exception.exitstatus, 1)
164
165         self.asok_command(mds_rank, "scrub_path /{0} repair".format(test_dir),
166                           lambda j, r: self.json_validator(j, r, "return_code", 0))
167
168         # wait a few second for background repair
169         time.sleep(10)
170
171         # fragstat should be fixed
172         self.mount_a.run_shell(["sudo", "rmdir", test_dir])
173
174     @staticmethod
175     def json_validator(json_out, rc, element, expected_value):
176         if rc != 0:
177             return False, "asok command returned error {rc}".format(rc=rc)
178         element_value = json_out.get(element)
179         if element_value != expected_value:
180             return False, "unexpectedly got {jv} instead of {ev}!".format(
181                 jv=element_value, ev=expected_value)
182         return True, "Succeeded"
183
184     def asok_command(self, mds_rank, command, validator):
185         log.info("Running command '{command}'".format(command=command))
186
187         command_list = command.split()
188
189         # we just assume there's an active mds for every rank
190         mds_id = self.fs.get_active_names()[mds_rank]
191         proc = self.fs.mon_manager.admin_socket('mds', mds_id,
192                                                 command_list, check_status=False)
193         rout = proc.exitstatus
194         sout = proc.stdout.getvalue()
195
196         if sout.strip():
197             jout = json.loads(sout)
198         else:
199             jout = None
200
201         log.info("command '{command}' got response code " +
202                  "'{rout}' and stdout '{sout}'".format(
203                      command=command, rout=rout, sout=sout))
204
205         success, errstring = validator(jout, rout)
206
207         if not success:
208             raise AsokCommandFailedError(command, rout, jout, errstring)
209
210         return jout
211
212     def clone_repo(self, client_mount, path):
213         repo = "ceph-qa-suite"
214         repo_path = os.path.join(path, repo)
215         client_mount.run_shell(["mkdir", "-p", path])
216
217         try:
218             client_mount.stat(repo_path)
219         except CommandFailedError:
220             client_mount.run_shell([
221                 "git", "clone", '--branch', 'giant',
222                 "http://github.com/ceph/{repo}".format(repo=repo),
223                 "{path}/{repo}".format(path=path, repo=repo)
224             ])
225
226         return repo_path
227
228
229 class AsokCommandFailedError(Exception):
230     """
231     Exception thrown when we get an unexpected response
232     on an admin socket command
233     """
234
235     def __init__(self, command, rc, json_out, errstring):
236         self.command = command
237         self.rc = rc
238         self.json = json_out
239         self.errstring = errstring
240
241     def __str__(self):
242         return "Admin socket: {command} failed with rc={rc}," + \
243                "json output={json}, because '{es}'".format(
244                    command=self.command, rc=self.rc,
245                    json=self.json, es=self.errstring)