Merge "Runners crash on test startup."
[yardstick.git] / yardstick / network_services / nfvi / resource.py
index dc5c46a..5922bd3 100644 (file)
@@ -27,9 +27,11 @@ from oslo_config import cfg
 from oslo_utils.encodeutils import safe_decode
 
 from yardstick import ssh
+from yardstick.common.exceptions import ResourceCommandError
 from yardstick.common.task_template import finalize_for_yaml
 from yardstick.common.utils import validate_non_string_sequence
 from yardstick.network_services.nfvi.collectd import AmqpConsumer
+from yardstick.benchmark.contexts import heat
 
 
 LOG = logging.getLogger(__name__)
@@ -51,7 +53,8 @@ class ResourceProfile(object):
     DEFAULT_TIMEOUT = 3600
     OVS_SOCKET_PATH = "/usr/local/var/run/openvswitch/db.sock"
 
-    def __init__(self, mgmt, port_names=None, plugins=None, interval=None, timeout=None):
+    def __init__(self, mgmt, port_names=None, plugins=None,
+                 interval=None, timeout=None, reset_mq_flag=True):
 
         if plugins is None:
             self.plugins = {}
@@ -76,6 +79,7 @@ class ResourceProfile(object):
         # we need to save mgmt so we can connect to port 5672
         self.mgmt = mgmt
         self.connection = ssh.AutoConnectSSH.from_node(mgmt)
+        self._reset_mq_flag = reset_mq_flag
 
     @classmethod
     def make_from_node(cls, node, timeout):
@@ -86,7 +90,10 @@ class ResourceProfile(object):
         plugins = collectd_options.get("plugins", {})
         interval = collectd_options.get("interval")
 
-        return cls(node, plugins=plugins, interval=interval, timeout=timeout)
+        reset_mq_flag = (False if node.get("ctx_type") == heat.HeatContext.__context_type__
+                          else True)
+        return cls(node, plugins=plugins, interval=interval,
+                   timeout=timeout, reset_mq_flag=reset_mq_flag)
 
     def check_if_system_agent_running(self, process):
         """ verify if system agent is running """
@@ -209,11 +216,14 @@ class ResourceProfile(object):
         if not self.enable:
             return {}
 
+        if self.check_if_system_agent_running("collectd")[0] != 0:
+            return {}
+
         metric = {}
         while not self._queue.empty():
             metric.update(self._queue.get())
-        msg = self.parse_collectd_result(metric)
-        return msg
+
+        return self.parse_collectd_result(metric)
 
     def _provide_config_file(self, config_file_path, nfvi_cfg, template_kwargs):
         template = pkg_resources.resource_string("yardstick.network_services.nfvi",
@@ -249,59 +259,93 @@ class ResourceProfile(object):
         if status != 0:
             LOG.error("cannot find OVS socket %s", socket_path)
 
+    def _reset_rabbitmq(self, connection):
+        # Reset amqp queue
+        LOG.debug("reset and setup amqp to collect data from collectd")
+        # ensure collectd.conf.d exists to avoid error/warning
+        cmd_list = ["sudo mkdir -p /etc/collectd/collectd.conf.d",
+                    "sudo service rabbitmq-server restart",
+                    "sudo rabbitmqctl stop_app",
+                    "sudo rabbitmqctl reset",
+                    "sudo rabbitmqctl start_app",
+                    "sudo rabbitmqctl add_user admin admin",
+                    "sudo rabbitmqctl authenticate_user admin admin",
+                    "sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'"
+                    ]
+
+        for cmd in cmd_list:
+            exit_status, _, stderr = connection.execute(cmd)
+            if exit_status != 0:
+                raise ResourceCommandError(command=cmd, stderr=stderr)
+
+    def _check_rabbitmq_user(self, connection, user='admin'):
+        exit_status, stdout, _ = connection.execute("sudo rabbitmqctl list_users")
+        if exit_status == 0:
+            for line in stdout.split('\n')[1:]:
+                if line.split('\t')[0] == user:
+                    return True
+
+    def _set_rabbitmq_admin_user(self, connection):
+        LOG.debug("add admin user to amqp")
+        cmd_list = ["sudo rabbitmqctl add_user admin admin",
+                    "sudo rabbitmqctl authenticate_user admin admin",
+                    "sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'"
+                    ]
+
+        for cmd in cmd_list:
+            exit_status, stdout, stderr = connection.execute(cmd)
+            if exit_status != 0:
+                raise ResourceCommandError(command=cmd, stdout=stdout, stderr=stderr)
+
+    def _start_rabbitmq(self, connection):
+        if self._reset_mq_flag:
+            self._reset_rabbitmq(connection)
+        else:
+            if not self._check_rabbitmq_user(connection):
+                self._set_rabbitmq_admin_user(connection)
+
+        # check stdout for "sudo rabbitmqctl status" command
+        cmd = "sudo rabbitmqctl status"
+        _, stdout, stderr = connection.execute(cmd)
+        if not re.search("RabbitMQ", stdout):
+            LOG.error("rabbitmqctl status don't have RabbitMQ in running apps")
+            raise ResourceCommandError(command=cmd, stderr=stderr)
+
     def _start_collectd(self, connection, bin_path):
         LOG.debug("Starting collectd to collect NFVi stats")
-        connection.execute('sudo pkill -x -9 collectd')
         collectd_path = os.path.join(bin_path, "collectd", "sbin", "collectd")
         config_file_path = os.path.join(bin_path, "collectd", "etc")
-        exit_status = connection.execute("which %s > /dev/null 2>&1" % collectd_path)[0]
+        self._prepare_collectd_conf(config_file_path)
+
+        connection.execute('sudo pkill -x -9 collectd')
+        cmd = "which %s > /dev/null 2>&1" % collectd_path
+        exit_status, _, stderr = connection.execute(cmd)
         if exit_status != 0:
-            LOG.warning("%s is not present disabling", collectd_path)
-            # disable auto-provisioning because it requires Internet access
-            # collectd_installer = os.path.join(bin_path, "collectd.sh")
-            # provision_tool(connection, collectd)
-            # http_proxy = os.environ.get('http_proxy', '')
-            # https_proxy = os.environ.get('https_proxy', '')
-            # connection.execute("sudo %s '%s' '%s'" % (
-            #     collectd_installer, http_proxy, https_proxy))
-            return
+            raise ResourceCommandError(command=cmd, stderr=stderr)
+
         if "ovs_stats" in self.plugins:
             self._setup_ovs_stats(connection)
 
         LOG.debug("Starting collectd to collect NFVi stats")
-        # ensure collectd.conf.d exists to avoid error/warning
-        connection.execute("sudo mkdir -p /etc/collectd/collectd.conf.d")
-        self._prepare_collectd_conf(config_file_path)
-
-        # Reset amqp queue
-        LOG.debug("reset and setup amqp to collect data from collectd")
-        connection.execute("sudo rm -rf /var/lib/rabbitmq/mnesia/rabbit*")
-        connection.execute("sudo service rabbitmq-server start")
-        connection.execute("sudo rabbitmqctl stop_app")
-        connection.execute("sudo rabbitmqctl reset")
-        connection.execute("sudo rabbitmqctl start_app")
-        connection.execute("sudo service rabbitmq-server restart")
-
-        LOG.debug("Creating admin user for rabbitmq in order to collect data from collectd")
-        connection.execute("sudo rabbitmqctl delete_user guest")
-        connection.execute("sudo rabbitmqctl add_user admin admin")
-        connection.execute("sudo rabbitmqctl authenticate_user admin admin")
-        connection.execute("sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'")
-
         LOG.debug("Start collectd service..... %s second timeout", self.timeout)
         # intel_pmu plug requires large numbers of files open, so try to set
         # ulimit -n to a large value
-        connection.execute("sudo bash -c 'ulimit -n 1000000 ; %s'" % collectd_path,
-                           timeout=self.timeout)
+
+        cmd = "sudo bash -c 'ulimit -n 1000000 ; %s'" % collectd_path
+        exit_status, _, stderr = connection.execute(cmd, timeout=self.timeout)
+        if exit_status != 0:
+            raise ResourceCommandError(command=cmd, stderr=stderr)
+
         LOG.debug("Done")
 
     def initiate_systemagent(self, bin_path):
         """ Start system agent for NFVi collection on host """
         if self.enable:
             try:
+                self._start_rabbitmq(self.connection)
                 self._start_collectd(self.connection, bin_path)
-            except Exception:
-                LOG.exception("Exception during collectd start")
+            except ResourceCommandError as e:
+                LOG.exception("Exception during collectd and rabbitmq start: %s", str(e))
                 raise
 
     def start(self):
@@ -331,5 +375,7 @@ class ResourceProfile(object):
         if pid:
             self.connection.execute('sudo kill -9 "%s"' % pid)
         self.connection.execute('sudo pkill -9 "%s"' % agent)
-        self.connection.execute('sudo service rabbitmq-server stop')
-        self.connection.execute("sudo rabbitmqctl stop_app")
+
+        if self._reset_mq_flag:
+            self.connection.execute('sudo service rabbitmq-server stop')
+            self.connection.execute("sudo rabbitmqctl stop_app")