X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=yardstick%2Fnetwork_services%2Fnfvi%2Fresource.py;h=5922bd3b93adebdfd855385e7b94b44d8fa9e7ad;hb=0a4b61c2284fb984f9c0d779682bf812219b8e7a;hp=adf4d8ae6eea5aa4578dad1de1e54139dc41efdd;hpb=7cdbac22a305aabf11d1e99afaf890d159c2257e;p=yardstick.git diff --git a/yardstick/network_services/nfvi/resource.py b/yardstick/network_services/nfvi/resource.py index adf4d8ae6..5922bd3b9 100644 --- a/yardstick/network_services/nfvi/resource.py +++ b/yardstick/network_services/nfvi/resource.py @@ -13,27 +13,25 @@ # limitations under the License. """ Resource collection definitions """ -from __future__ import absolute_import -from __future__ import print_function - -import logging -from itertools import chain - import errno -import jinja2 +from itertools import chain +import logging +import multiprocessing import os import os.path import re -import multiprocessing -import pkg_resources +import jinja2 +import pkg_resources from oslo_config import cfg from oslo_utils.encodeutils import safe_decode from yardstick import ssh +from yardstick.common.exceptions import ResourceCommandError from yardstick.common.task_template import finalize_for_yaml from yardstick.common.utils import validate_non_string_sequence from yardstick.network_services.nfvi.collectd import AmqpConsumer +from yardstick.benchmark.contexts import heat LOG = logging.getLogger(__name__) @@ -55,7 +53,8 @@ class ResourceProfile(object): DEFAULT_TIMEOUT = 3600 OVS_SOCKET_PATH = "/usr/local/var/run/openvswitch/db.sock" - def __init__(self, mgmt, port_names=None, plugins=None, interval=None, timeout=None): + def __init__(self, mgmt, port_names=None, plugins=None, + interval=None, timeout=None, reset_mq_flag=True): if plugins is None: self.plugins = {} @@ -80,6 +79,7 @@ class ResourceProfile(object): # we need to save mgmt so we can connect to port 5672 self.mgmt = mgmt self.connection = ssh.AutoConnectSSH.from_node(mgmt) + self._reset_mq_flag = reset_mq_flag @classmethod def make_from_node(cls, node, timeout): @@ -90,9 +90,12 @@ class ResourceProfile(object): plugins = collectd_options.get("plugins", {}) interval = collectd_options.get("interval") - return cls(node, plugins=plugins, interval=interval, timeout=timeout) + reset_mq_flag = (False if node.get("ctx_type") == heat.HeatContext.__context_type__ + else True) + return cls(node, plugins=plugins, interval=interval, + timeout=timeout, reset_mq_flag=reset_mq_flag) - def check_if_sa_running(self, process): + def check_if_system_agent_running(self, process): """ verify if system agent is running """ try: err, pid, _ = self.connection.execute("pgrep -f %s" % process) @@ -101,7 +104,7 @@ class ResourceProfile(object): except OSError as e: if e.errno in {errno.ECONNRESET}: # if we can't connect to check, then we won't be able to connect to stop it - LOG.exception("can't connect to host to check collectd status") + LOG.exception("Can't connect to host to check %s status", process) return 1, None raise @@ -213,11 +216,14 @@ class ResourceProfile(object): if not self.enable: return {} + if self.check_if_system_agent_running("collectd")[0] != 0: + return {} + metric = {} while not self._queue.empty(): metric.update(self._queue.get()) - msg = self.parse_collectd_result(metric) - return msg + + return self.parse_collectd_result(metric) def _provide_config_file(self, config_file_path, nfvi_cfg, template_kwargs): template = pkg_resources.resource_string("yardstick.network_services.nfvi", @@ -253,59 +259,93 @@ class ResourceProfile(object): if status != 0: LOG.error("cannot find OVS socket %s", socket_path) + def _reset_rabbitmq(self, connection): + # Reset amqp queue + LOG.debug("reset and setup amqp to collect data from collectd") + # ensure collectd.conf.d exists to avoid error/warning + cmd_list = ["sudo mkdir -p /etc/collectd/collectd.conf.d", + "sudo service rabbitmq-server restart", + "sudo rabbitmqctl stop_app", + "sudo rabbitmqctl reset", + "sudo rabbitmqctl start_app", + "sudo rabbitmqctl add_user admin admin", + "sudo rabbitmqctl authenticate_user admin admin", + "sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'" + ] + + for cmd in cmd_list: + exit_status, _, stderr = connection.execute(cmd) + if exit_status != 0: + raise ResourceCommandError(command=cmd, stderr=stderr) + + def _check_rabbitmq_user(self, connection, user='admin'): + exit_status, stdout, _ = connection.execute("sudo rabbitmqctl list_users") + if exit_status == 0: + for line in stdout.split('\n')[1:]: + if line.split('\t')[0] == user: + return True + + def _set_rabbitmq_admin_user(self, connection): + LOG.debug("add admin user to amqp") + cmd_list = ["sudo rabbitmqctl add_user admin admin", + "sudo rabbitmqctl authenticate_user admin admin", + "sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'" + ] + + for cmd in cmd_list: + exit_status, stdout, stderr = connection.execute(cmd) + if exit_status != 0: + raise ResourceCommandError(command=cmd, stdout=stdout, stderr=stderr) + + def _start_rabbitmq(self, connection): + if self._reset_mq_flag: + self._reset_rabbitmq(connection) + else: + if not self._check_rabbitmq_user(connection): + self._set_rabbitmq_admin_user(connection) + + # check stdout for "sudo rabbitmqctl status" command + cmd = "sudo rabbitmqctl status" + _, stdout, stderr = connection.execute(cmd) + if not re.search("RabbitMQ", stdout): + LOG.error("rabbitmqctl status don't have RabbitMQ in running apps") + raise ResourceCommandError(command=cmd, stderr=stderr) + def _start_collectd(self, connection, bin_path): LOG.debug("Starting collectd to collect NFVi stats") - connection.execute('sudo pkill -x -9 collectd') collectd_path = os.path.join(bin_path, "collectd", "sbin", "collectd") config_file_path = os.path.join(bin_path, "collectd", "etc") - exit_status = connection.execute("which %s > /dev/null 2>&1" % collectd_path)[0] + self._prepare_collectd_conf(config_file_path) + + connection.execute('sudo pkill -x -9 collectd') + cmd = "which %s > /dev/null 2>&1" % collectd_path + exit_status, _, stderr = connection.execute(cmd) if exit_status != 0: - LOG.warning("%s is not present disabling", collectd_path) - # disable auto-provisioning because it requires Internet access - # collectd_installer = os.path.join(bin_path, "collectd.sh") - # provision_tool(connection, collectd) - # http_proxy = os.environ.get('http_proxy', '') - # https_proxy = os.environ.get('https_proxy', '') - # connection.execute("sudo %s '%s' '%s'" % ( - # collectd_installer, http_proxy, https_proxy)) - return + raise ResourceCommandError(command=cmd, stderr=stderr) + if "ovs_stats" in self.plugins: self._setup_ovs_stats(connection) LOG.debug("Starting collectd to collect NFVi stats") - # ensure collectd.conf.d exists to avoid error/warning - connection.execute("sudo mkdir -p /etc/collectd/collectd.conf.d") - self._prepare_collectd_conf(config_file_path) - - # Reset amqp queue - LOG.debug("reset and setup amqp to collect data from collectd") - connection.execute("sudo rm -rf /var/lib/rabbitmq/mnesia/rabbit*") - connection.execute("sudo service rabbitmq-server start") - connection.execute("sudo rabbitmqctl stop_app") - connection.execute("sudo rabbitmqctl reset") - connection.execute("sudo rabbitmqctl start_app") - connection.execute("sudo service rabbitmq-server restart") - - LOG.debug("Creating admin user for rabbitmq in order to collect data from collectd") - connection.execute("sudo rabbitmqctl delete_user guest") - connection.execute("sudo rabbitmqctl add_user admin admin") - connection.execute("sudo rabbitmqctl authenticate_user admin admin") - connection.execute("sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'") - LOG.debug("Start collectd service..... %s second timeout", self.timeout) # intel_pmu plug requires large numbers of files open, so try to set # ulimit -n to a large value - connection.execute("sudo bash -c 'ulimit -n 1000000 ; %s'" % collectd_path, - timeout=self.timeout) + + cmd = "sudo bash -c 'ulimit -n 1000000 ; %s'" % collectd_path + exit_status, _, stderr = connection.execute(cmd, timeout=self.timeout) + if exit_status != 0: + raise ResourceCommandError(command=cmd, stderr=stderr) + LOG.debug("Done") def initiate_systemagent(self, bin_path): """ Start system agent for NFVi collection on host """ if self.enable: try: + self._start_rabbitmq(self.connection) self._start_collectd(self.connection, bin_path) - except Exception: - LOG.exception("Exception during collectd start") + except ResourceCommandError as e: + LOG.exception("Exception during collectd and rabbitmq start: %s", str(e)) raise def start(self): @@ -327,7 +367,7 @@ class ResourceProfile(object): self.amqp_client.terminate() LOG.debug("Check if %s is running", agent) - status, pid = self.check_if_sa_running(agent) + status, pid = self.check_if_system_agent_running(agent) LOG.debug("status %s pid %s", status, pid) if status != 0: return @@ -335,5 +375,7 @@ class ResourceProfile(object): if pid: self.connection.execute('sudo kill -9 "%s"' % pid) self.connection.execute('sudo pkill -9 "%s"' % agent) - self.connection.execute('sudo service rabbitmq-server stop') - self.connection.execute("sudo rabbitmqctl stop_app") + + if self._reset_mq_flag: + self.connection.execute('sudo service rabbitmq-server stop') + self.connection.execute("sudo rabbitmqctl stop_app")