X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fqa%2Ftasks%2Fcheck_counter.py;fp=src%2Fceph%2Fqa%2Ftasks%2Fcheck_counter.py;h=a3d84e00fe1163f793c4b711e350e1d98bf8519c;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/qa/tasks/check_counter.py b/src/ceph/qa/tasks/check_counter.py new file mode 100644 index 0000000..a3d84e0 --- /dev/null +++ b/src/ceph/qa/tasks/check_counter.py @@ -0,0 +1,96 @@ + +import logging +import json + +from teuthology.task import Task +from teuthology import misc +import ceph_manager + +log = logging.getLogger(__name__) + + +class CheckCounter(Task): + """ + Use this task to validate that some daemon perf counters were + incremented by the nested tasks. + + Config: + 'cluster_name': optional, specify which cluster + 'target': dictionary of daemon type to list of performance counters. + 'dry_run': just log the value of the counters, don't fail if they + aren't nonzero. + + Success condition is that for all of the named counters, at least + one of the daemons of that type has the counter nonzero. + + Example to check cephfs dirfrag splits are happening: + - install: + - ceph: + - ceph-fuse: + - check-counter: + counters: + mds: + - "mds.dir_split" + - workunit: ... + """ + + def start(self): + log.info("START") + + def end(self): + cluster_name = self.config.get('cluster_name', None) + dry_run = self.config.get('dry_run', False) + targets = self.config.get('counters', {}) + + if cluster_name is None: + cluster_name = self.ctx.managers.keys()[0] + + for daemon_type, counters in targets.items(): + # List of 'a', 'b', 'c'... + daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type)) + daemons = dict([(daemon_id, + self.ctx.daemons.get_daemon(daemon_type, daemon_id)) + for daemon_id in daemon_ids]) + + seen = set() + + for daemon_id, daemon in daemons.items(): + if not daemon.running(): + log.info("Ignoring daemon {0}, it isn't running".format(daemon_id)) + continue + else: + log.debug("Getting stats from {0}".format(daemon_id)) + + manager = self.ctx.managers[cluster_name] + proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"]) + response_data = proc.stdout.getvalue().strip() + if response_data: + perf_dump = json.loads(response_data) + else: + log.warning("No admin socket response from {0}, skipping".format(daemon_id)) + continue + + for counter in counters: + subsys, counter_id = counter.split(".") + if subsys not in perf_dump or counter_id not in perf_dump[subsys]: + log.warning("Counter '{0}' not found on daemon {1}.{2}".format( + counter, daemon_type, daemon_id)) + continue + value = perf_dump[subsys][counter_id] + + log.info("Daemon {0}.{1} {2}={3}".format( + daemon_type, daemon_id, counter, value + )) + + if value > 0: + seen.add(counter) + + if not dry_run: + unseen = set(counters) - set(seen) + if unseen: + raise RuntimeError("The following counters failed to be set " + "on {0} daemons: {1}".format( + daemon_type, unseen + )) + +task = CheckCounter