Merge "add collectd resource node capability"
[yardstick.git] / yardstick / network_services / nfvi / resource.py
index ce09b65..7e8334c 100644 (file)
 
 from __future__ import absolute_import
 from __future__ import print_function
-import tempfile
+
 import logging
+from itertools import chain
+
+import jinja2
 import os
 import os.path
 import re
 import multiprocessing
-from collections import Sequence
+import pkg_resources
 
 from oslo_config import cfg
+from oslo_utils.encodeutils import safe_decode
 
 from yardstick import ssh
+from yardstick.common.task_template import finalize_for_yaml
+from yardstick.common.utils import validate_non_string_sequence
 from yardstick.network_services.nfvi.collectd import AmqpConsumer
-from yardstick.network_services.utils import provision_tool
+from yardstick.network_services.utils import get_nsb_option
 
 LOG = logging.getLogger(__name__)
 
 CONF = cfg.CONF
 ZMQ_OVS_PORT = 5567
 ZMQ_POLLING_TIME = 12000
-LIST_PLUGINS_ENABLED = ["amqp", "cpu", "cpufreq", "intel_rdt", "memory",
-                        "hugepages", "dpdkstat", "virt", "ovs_stats"]
+LIST_PLUGINS_ENABLED = ["amqp", "cpu", "cpufreq", "memory",
+                        "hugepages"]
 
 
 class ResourceProfile(object):
     """
     This profile adds a resource at the beginning of the test session
     """
+    COLLECTD_CONF = "collectd.conf"
+    AMPQ_PORT = 5672
+    DEFAULT_INTERVAL = 25
+    DEFAULT_TIMEOUT = 3600
+
+    def __init__(self, mgmt, port_names=None, cores=None, plugins=None,
+                 interval=None, timeout=None):
+
+        if plugins is None:
+            self.plugins = {}
+        else:
+            self.plugins = plugins
+
+        if interval is None:
+            self.interval = self.DEFAULT_INTERVAL
+        else:
+            self.interval = interval
+
+        if timeout is None:
+            self.timeout = self.DEFAULT_TIMEOUT
+        else:
+            self.timeout = timeout
 
-    def __init__(self, mgmt, interfaces=None, cores=None):
         self.enable = True
-        self.connection = None
-        self.cores = cores if isinstance(cores, Sequence) else []
+        self.cores = validate_non_string_sequence(cores, default=[])
         self._queue = multiprocessing.Queue()
         self.amqp_client = None
-        self.interfaces = interfaces if isinstance(interfaces, Sequence) else []
+        self.port_names = validate_non_string_sequence(port_names, default=[])
 
-        # why the host or ip?
-        self.vnfip = mgmt.get("host", mgmt["ip"])
-        self.connection = ssh.SSH.from_node(mgmt, overrides={"ip": self.vnfip})
-
-        self.connection.wait()
+        # we need to save mgmt so we can connect to port 5672
+        self.mgmt = mgmt
+        self.connection = ssh.AutoConnectSSH.from_node(mgmt)
 
     def check_if_sa_running(self, process):
         """ verify if system agent is running """
-        err, pid, _ = self.connection.execute("pgrep -f %s" % process)
-        return [err == 0, pid]
+        status, pid, _ = self.connection.execute("pgrep -f %s" % process)
+        return status == 0, pid
 
     def run_collectd_amqp(self):
         """ run amqp consumer to collect the NFVi data """
-        amqp_url = 'amqp://admin:admin@{}:5672/%2F'.format(self.vnfip)
+        amqp_url = 'amqp://admin:admin@{}:{}/%2F'.format(self.mgmt['ip'], self.AMPQ_PORT)
         amqp = AmqpConsumer(amqp_url, self._queue)
         try:
             amqp.run()
@@ -73,18 +97,18 @@ class ResourceProfile(object):
 
     @classmethod
     def parse_simple_resource(cls, key, value):
-        return {'/'.join(key): value.split(":")[1]}
+        reskey = "/".join(rkey for rkey in key if "nsb_stats" not in rkey)
+        return {reskey: value.split(":")[1]}
 
     @classmethod
-    def get_cpu_data(cls, key_split, value):
+    def get_cpu_data(cls, res_key0, res_key1, value):
         """ Get cpu topology of the host """
         pattern = r"-(\d+)"
-        if "cpufreq" in key_split[0]:
-            metric = key_split[0]
-            source = key_split[1]
+
+        if 'cpufreq' in res_key0:
+            metric, source = res_key0, res_key1
         else:
-            metric = key_split[1]
-            source = key_split[0]
+            metric, source = res_key1, res_key0
 
         match = re.search(pattern, source, re.MULTILINE)
         if not match:
@@ -109,6 +133,10 @@ class ResourceProfile(object):
     def parse_ovs_stats(cls, key, value):
         return cls.parse_simple_resource(key, value)
 
+    @classmethod
+    def parse_intel_pmu_stats(cls, key, value):
+        return {''.join(str(v) for v in key): value.split(":")[1]}
+
     def parse_collectd_result(self, metrics, core_list):
         """ convert collectd data into json"""
         result = {
@@ -118,17 +146,21 @@ class ResourceProfile(object):
             "dpdkstat": {},
             "virt": {},
             "ovs_stats": {},
+            "intel_pmu": {},
         }
         testcase = ""
 
-        for key, value in metrics.items():
+        # unicode decode
+        decoded = ((safe_decode(k, 'utf-8'), safe_decode(v, 'utf-8')) for k, v in metrics.items())
+        for key, value in decoded:
             key_split = key.split("/")
             res_key_iter = (key for key in key_split if "nsb_stats" not in key)
             res_key0 = next(res_key_iter)
             res_key1 = next(res_key_iter)
 
             if "cpu" in res_key0 or "intel_rdt" in res_key0:
-                cpu_key, name, metric, testcase = self.get_cpu_data(key_split, value)
+                cpu_key, name, metric, testcase = \
+                    self.get_cpu_data(res_key0, res_key1, value)
                 if cpu_key in core_list:
                     result["cpu"].setdefault(cpu_key, {}).update({name: metric})
 
@@ -136,16 +168,19 @@ class ResourceProfile(object):
                 result["memory"].update({res_key1: value.split(":")[0]})
 
             elif "hugepages" in res_key0:
-                result["hugepages"].update(self.parse_hugepages(key, value))
+                result["hugepages"].update(self.parse_hugepages(key_split, value))
 
             elif "dpdkstat" in res_key0:
-                result["dpdkstat"].update(self.parse_dpdkstat(key, value))
+                result["dpdkstat"].update(self.parse_dpdkstat(key_split, value))
 
             elif "virt" in res_key1:
-                result["virt"].update(self.parse_virt(key, value))
+                result["virt"].update(self.parse_virt(key_split, value))
 
             elif "ovs_stats" in res_key0:
-                result["ovs_stats"].update(self.parse_ovs_stats(key, value))
+                result["ovs_stats"].update(self.parse_ovs_stats(key_split, value))
+
+            elif "intel_pmu-all" in res_key0:
+                result["intel_pmu"].update(self.parse_intel_pmu_stats(res_key1, value))
 
         result["timestamp"] = testcase
 
@@ -153,52 +188,67 @@ class ResourceProfile(object):
 
     def amqp_process_for_nfvi_kpi(self):
         """ amqp collect and return nfvi kpis """
-        if self.amqp_client is None:
+        if self.amqp_client is None and self.enable:
             self.amqp_client = \
                 multiprocessing.Process(target=self.run_collectd_amqp)
             self.amqp_client.start()
 
     def amqp_collect_nfvi_kpi(self):
         """ amqp collect and return nfvi kpis """
+        if not self.enable:
+            return {}
+
         metric = {}
         while not self._queue.empty():
             metric.update(self._queue.get())
         msg = self.parse_collectd_result(metric, self.cores)
         return msg
 
-    def _provide_config_file(self, bin_path, nfvi_cfg, kwargs):
-        with open(os.path.join(bin_path, nfvi_cfg), 'r') as cfg:
-            template = cfg.read()
-        cfg, cfg_content = tempfile.mkstemp()
-        with os.fdopen(cfg, "w+") as cfg:
-            cfg.write(template.format(**kwargs))
-        cfg_file = os.path.join(bin_path, nfvi_cfg)
-        self.connection.put(cfg_content, cfg_file)
-
-    def _prepare_collectd_conf(self, bin_path):
+    def _provide_config_file(self, config_file_path, nfvi_cfg, template_kwargs):
+        template = pkg_resources.resource_string("yardstick.network_services.nfvi",
+                                                 nfvi_cfg).decode('utf-8')
+        cfg_content = jinja2.Template(template, trim_blocks=True, lstrip_blocks=True,
+                                      finalize=finalize_for_yaml).render(
+            **template_kwargs)
+        # cfg_content = io.StringIO(template.format(**template_kwargs))
+        cfg_file = os.path.join(config_file_path, nfvi_cfg)
+        # must write as root, so use sudo
+        self.connection.execute("cat | sudo tee {}".format(cfg_file), stdin=cfg_content)
+
+    def _prepare_collectd_conf(self, config_file_path):
         """ Prepare collectd conf """
-        loadplugin = "\n".join("LoadPlugin {0}".format(plugin)
-                               for plugin in LIST_PLUGINS_ENABLED)
-
-        interfaces = "\n".join("PortName '{0[name]}'".format(interface)
-                               for interface in self.interfaces)
 
         kwargs = {
-            "interval": '25',
-            "loadplugin": loadplugin,
-            "dpdk_interface": interfaces,
+            "interval": self.interval,
+            "loadplugins": set(chain(LIST_PLUGINS_ENABLED, self.plugins.keys())),
+            # Optional fields PortName is descriptive only, use whatever is present
+            "port_names": self.port_names,
+            # "ovs_bridge_interfaces": ["br-int"],
+            "plugins": self.plugins,
         }
-
-        self._provide_config_file(bin_path, 'collectd.conf', kwargs)
+        self._provide_config_file(config_file_path, self.COLLECTD_CONF, kwargs)
 
     def _start_collectd(self, connection, bin_path):
         LOG.debug("Starting collectd to collect NFVi stats")
-        # temp disable
-        return
-        connection.execute('sudo pkill -9 collectd')
-        collectd = os.path.join(bin_path, "collectd.sh")
-        provision_tool(connection, collectd)
-        self._prepare_collectd_conf(bin_path)
+        connection.execute('sudo pkill -x -9 collectd')
+        bin_path = get_nsb_option("bin_path")
+        collectd_path = os.path.join(bin_path, "collectd", "sbin", "collectd")
+        config_file_path = os.path.join(bin_path, "collectd", "etc")
+        exit_status = connection.execute("which %s > /dev/null 2>&1" % collectd_path)[0]
+        if exit_status != 0:
+            LOG.warning("%s is not present disabling", collectd_path)
+            # disable auto-provisioning because it requires Internet access
+            # collectd_installer = os.path.join(bin_path, "collectd.sh")
+            # provision_tool(connection, collectd)
+            # http_proxy = os.environ.get('http_proxy', '')
+            # https_proxy = os.environ.get('https_proxy', '')
+            # connection.execute("sudo %s '%s' '%s'" % (
+            #     collectd_installer, http_proxy, https_proxy))
+            return
+        LOG.debug("Starting collectd to collect NFVi stats")
+        # ensure collectd.conf.d exists to avoid error/warning
+        connection.execute("sudo mkdir -p /etc/collectd/collectd.conf.d")
+        self._prepare_collectd_conf(config_file_path)
 
         # Reset amqp queue
         LOG.debug("reset and setup amqp to collect data from collectd")
@@ -209,21 +259,24 @@ class ResourceProfile(object):
         connection.execute("sudo rabbitmqctl start_app")
         connection.execute("sudo service rabbitmq-server restart")
 
-        # Run collectd
+        LOG.debug("Creating admin user for rabbitmq in order to collect data from collectd")
+        connection.execute("sudo rabbitmqctl delete_user guest")
+        connection.execute("sudo rabbitmqctl add_user admin admin")
+        connection.execute("sudo rabbitmqctl authenticate_user admin admin")
+        connection.execute("sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'")
 
-        http_proxy = os.environ.get('http_proxy', '')
-        https_proxy = os.environ.get('https_proxy', '')
-        connection.execute("sudo %s '%s' '%s'" %
-                           (collectd, http_proxy, https_proxy))
         LOG.debug("Start collectd service.....")
-        connection.execute(
-            "sudo %s" % os.path.join(bin_path, "collectd", "collectd"))
+        connection.execute("sudo %s" % collectd_path)
         LOG.debug("Done")
 
     def initiate_systemagent(self, bin_path):
         """ Start system agent for NFVi collection on host """
         if self.enable:
-            self._start_collectd(self.connection, bin_path)
+            try:
+                self._start_collectd(self.connection, bin_path)
+            except Exception:
+                LOG.exception("Exception during collectd start")
+                raise
 
     def start(self):
         """ start nfvi collection """