X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=yardstick%2Fnetwork_services%2Fnfvi%2Fresource.py;h=fef44e2079fb358203ea0139b776f40fb7b56ebf;hb=49677852f2bc690d235318d2208504241eef61a9;hp=7e8334c73bb5ff5ac4097fb79f2a7fb64f4408ff;hpb=0eccf0cb46496b4a0fc1865f2c2c40d226697853;p=yardstick.git diff --git a/yardstick/network_services/nfvi/resource.py b/yardstick/network_services/nfvi/resource.py index 7e8334c73..fef44e207 100644 --- a/yardstick/network_services/nfvi/resource.py +++ b/yardstick/network_services/nfvi/resource.py @@ -19,6 +19,7 @@ from __future__ import print_function import logging from itertools import chain +import errno import jinja2 import os import os.path @@ -72,7 +73,6 @@ class ResourceProfile(object): self.timeout = timeout self.enable = True - self.cores = validate_non_string_sequence(cores, default=[]) self._queue = multiprocessing.Queue() self.amqp_client = None self.port_names = validate_non_string_sequence(port_names, default=[]) @@ -83,8 +83,16 @@ class ResourceProfile(object): def check_if_sa_running(self, process): """ verify if system agent is running """ - status, pid, _ = self.connection.execute("pgrep -f %s" % process) - return status == 0, pid + try: + err, pid, _ = self.connection.execute("pgrep -f %s" % process) + # strip whitespace + return err, pid.strip() + except OSError as e: + if e.errno in {errno.ECONNRESET}: + # if we can't connect to check, then we won't be able to connect to stop it + LOG.exception("can't connect to host to check collectd status") + return 1, None + raise def run_collectd_amqp(self): """ run amqp consumer to collect the NFVi data """ @@ -137,7 +145,7 @@ class ResourceProfile(object): def parse_intel_pmu_stats(cls, key, value): return {''.join(str(v) for v in key): value.split(":")[1]} - def parse_collectd_result(self, metrics, core_list): + def parse_collectd_result(self, metrics): """ convert collectd data into json""" result = { "cpu": {}, @@ -158,11 +166,10 @@ class ResourceProfile(object): res_key0 = next(res_key_iter) res_key1 = next(res_key_iter) - if "cpu" in res_key0 or "intel_rdt" in res_key0: + if "cpu" in res_key0 or "intel_rdt" in res_key0 or "intel_pmu" in res_key0: cpu_key, name, metric, testcase = \ self.get_cpu_data(res_key0, res_key1, value) - if cpu_key in core_list: - result["cpu"].setdefault(cpu_key, {}).update({name: metric}) + result["cpu"].setdefault(cpu_key, {}).update({name: metric}) elif "memory" in res_key0: result["memory"].update({res_key1: value.split(":")[0]}) @@ -179,9 +186,6 @@ class ResourceProfile(object): elif "ovs_stats" in res_key0: result["ovs_stats"].update(self.parse_ovs_stats(key_split, value)) - elif "intel_pmu-all" in res_key0: - result["intel_pmu"].update(self.parse_intel_pmu_stats(res_key1, value)) - result["timestamp"] = testcase return result @@ -189,8 +193,9 @@ class ResourceProfile(object): def amqp_process_for_nfvi_kpi(self): """ amqp collect and return nfvi kpis """ if self.amqp_client is None and self.enable: - self.amqp_client = \ - multiprocessing.Process(target=self.run_collectd_amqp) + self.amqp_client = multiprocessing.Process( + name="AmqpClient-{}-{}".format(self.mgmt['ip'], os.getpid()), + target=self.run_collectd_amqp) self.amqp_client.start() def amqp_collect_nfvi_kpi(self): @@ -201,7 +206,7 @@ class ResourceProfile(object): metric = {} while not self._queue.empty(): metric.update(self._queue.get()) - msg = self.parse_collectd_result(metric, self.cores) + msg = self.parse_collectd_result(metric) return msg def _provide_config_file(self, config_file_path, nfvi_cfg, template_kwargs): @@ -245,6 +250,10 @@ class ResourceProfile(object): # connection.execute("sudo %s '%s' '%s'" % ( # collectd_installer, http_proxy, https_proxy)) return + if "intel_pmu" in self.plugins: + LOG.debug("Downloading event list for pmu_stats plugin") + cmd = 'sudo bash -c \'cd /opt/tempT/pmu-tools/; python event_download_local.py\'' + connection.execute(cmd) LOG.debug("Starting collectd to collect NFVi stats") # ensure collectd.conf.d exists to avoid error/warning connection.execute("sudo mkdir -p /etc/collectd/collectd.conf.d") @@ -265,8 +274,11 @@ class ResourceProfile(object): connection.execute("sudo rabbitmqctl authenticate_user admin admin") connection.execute("sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'") - LOG.debug("Start collectd service.....") - connection.execute("sudo %s" % collectd_path) + LOG.debug("Start collectd service..... %s second timeout", self.timeout) + # intel_pmu plug requires large numbers of files open, so try to set + # ulimit -n to a large value + connection.execute("sudo bash -c 'ulimit -n 1000000 ; %s'" % collectd_path, + timeout=self.timeout) LOG.debug("Done") def initiate_systemagent(self, bin_path): @@ -292,13 +304,18 @@ class ResourceProfile(object): LOG.debug("Stop resource monitor...") if self.amqp_client is not None: + # we proper and try to join first + self.amqp_client.join(3) self.amqp_client.terminate() + LOG.debug("Check if %s is running", agent) status, pid = self.check_if_sa_running(agent) - if status == 0: + LOG.debug("status %s pid %s", status, pid) + if status != 0: return - self.connection.execute('sudo kill -9 %s' % pid) - self.connection.execute('sudo pkill -9 %s' % agent) + if pid: + self.connection.execute('sudo kill -9 "%s"' % pid) + self.connection.execute('sudo pkill -9 "%s"' % agent) self.connection.execute('sudo service rabbitmq-server stop') self.connection.execute("sudo rabbitmqctl stop_app")