1 # Copyright (c) 2016-2017 Intel Corporation
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 """ Resource collection definitions """
17 from itertools import chain
19 import multiprocessing
26 from oslo_config import cfg
27 from oslo_utils.encodeutils import safe_decode
29 from yardstick import ssh
30 from yardstick.common.exceptions import ResourceCommandError
31 from yardstick.common.task_template import finalize_for_yaml
32 from yardstick.common.utils import validate_non_string_sequence
33 from yardstick.network_services.nfvi.collectd import AmqpConsumer
34 from yardstick.benchmark.contexts import heat
37 LOG = logging.getLogger(__name__)
41 ZMQ_POLLING_TIME = 12000
42 LIST_PLUGINS_ENABLED = ["amqp", "cpu", "cpufreq", "memory",
46 class ResourceProfile(object):
48 This profile adds a resource at the beginning of the test session
50 COLLECTD_CONF = "collectd.conf"
51 BAR_COLLECTD_CONF_PATH = "/opt/collectd/etc/collectd.conf.d/"
54 DEFAULT_TIMEOUT = 3600
55 OVS_SOCKET_PATH = "/usr/local/var/run/openvswitch/db.sock"
57 def __init__(self, mgmt, port_names=None, plugins=None,
58 interval=None, timeout=None, reset_mq_flag=True):
63 self.plugins = plugins
66 self.interval = self.DEFAULT_INTERVAL
68 self.interval = interval
71 self.timeout = self.DEFAULT_TIMEOUT
73 self.timeout = timeout
76 self._queue = multiprocessing.Queue()
77 self.amqp_client = None
78 self.port_names = validate_non_string_sequence(port_names, default=[])
80 # we need to save mgmt so we can connect to port 5672
82 self.connection = ssh.AutoConnectSSH.from_node(mgmt)
83 self._reset_mq_flag = reset_mq_flag
86 def make_from_node(cls, node, timeout):
87 # node dict works as mgmt dict
88 # don't need port names, there is no way we can
89 # tell what port is used on the compute node
90 collectd_options = node["collectd"]
91 plugins = collectd_options.get("plugins", {})
92 interval = collectd_options.get("interval")
94 reset_mq_flag = (False if node.get("ctx_type") == heat.HeatContext.__context_type__
96 return cls(node, plugins=plugins, interval=interval,
97 timeout=timeout, reset_mq_flag=reset_mq_flag)
99 def check_if_system_agent_running(self, process):
100 """ verify if system agent is running """
102 err, pid, _ = self.connection.execute("pgrep -f %s" % process)
104 return err, pid.strip()
106 if e.errno in {errno.ECONNRESET}:
107 # if we can't connect to check, then we won't be able to connect to stop it
108 LOG.exception("Can't connect to host to check %s status", process)
112 def run_collectd_amqp(self):
113 """ run amqp consumer to collect the NFVi data """
114 amqp_url = 'amqp://admin:admin@{}:{}/%2F'.format(self.mgmt['ip'], self.AMPQ_PORT)
115 amqp = AmqpConsumer(amqp_url, self._queue)
118 except (AttributeError, RuntimeError, KeyboardInterrupt):
122 def parse_simple_resource(cls, key, value):
123 reskey = "/".join(rkey for rkey in key if "nsb_stats" not in rkey)
124 return {reskey: value.split(":")[1]}
127 def get_cpu_data(cls, res_key0, res_key1, value):
128 """ Get cpu topology of the host """
131 if 'cpufreq' in res_key0:
132 metric, source = res_key0, res_key1
134 metric, source = res_key1, res_key0
136 match = re.search(pattern, source, re.MULTILINE)
138 return "error", "Invalid", "", ""
140 time, value = value.split(":")
141 return str(match.group(1)), metric, value, time
144 def parse_hugepages(cls, key, value):
145 return cls.parse_simple_resource(key, value)
148 def parse_dpdkstat(cls, key, value):
149 return cls.parse_simple_resource(key, value)
152 def parse_virt(cls, key, value):
153 return cls.parse_simple_resource(key, value)
156 def parse_ovs_stats(cls, key, value):
157 return cls.parse_simple_resource(key, value)
160 def parse_intel_pmu_stats(cls, key, value):
161 return {''.join(str(v) for v in key): value.split(":")[1]}
163 def parse_collectd_result(self, metrics):
164 """ convert collectd data into json"""
176 decoded = ((safe_decode(k, 'utf-8'), safe_decode(v, 'utf-8')) for k, v in metrics.items())
177 for key, value in decoded:
178 key_split = key.split("/")
179 res_key_iter = (key for key in key_split if "nsb_stats" not in key)
180 res_key0 = next(res_key_iter)
181 res_key1 = next(res_key_iter)
183 if "cpu" in res_key0 or "intel_rdt" in res_key0 or "intel_pmu" in res_key0:
184 cpu_key, name, metric, testcase = \
185 self.get_cpu_data(res_key0, res_key1, value)
186 result["cpu"].setdefault(cpu_key, {}).update({name: metric})
188 elif "memory" in res_key0:
189 result["memory"].update({res_key1: value.split(":")[0]})
191 elif "hugepages" in res_key0:
192 result["hugepages"].update(self.parse_hugepages(key_split, value))
194 elif "dpdkstat" in res_key0:
195 result["dpdkstat"].update(self.parse_dpdkstat(key_split, value))
197 elif "virt" in res_key1:
198 result["virt"].update(self.parse_virt(key_split, value))
200 elif "ovs_stats" in res_key0:
201 result["ovs_stats"].update(self.parse_ovs_stats(key_split, value))
203 result["timestamp"] = testcase
207 def amqp_process_for_nfvi_kpi(self):
208 """ amqp collect and return nfvi kpis """
209 if self.amqp_client is None and self.enable:
210 self.amqp_client = multiprocessing.Process(
211 name="AmqpClient-{}-{}".format(self.mgmt['ip'], os.getpid()),
212 target=self.run_collectd_amqp)
213 self.amqp_client.start()
215 def amqp_collect_nfvi_kpi(self):
216 """ amqp collect and return nfvi kpis """
220 if self.check_if_system_agent_running("collectd")[0] != 0:
224 while not self._queue.empty():
225 metric.update(self._queue.get())
227 return self.parse_collectd_result(metric)
229 def _provide_config_file(self, config_file_path, nfvi_cfg, template_kwargs):
230 template = pkg_resources.resource_string("yardstick.network_services.nfvi",
231 nfvi_cfg).decode('utf-8')
232 cfg_content = jinja2.Template(template, trim_blocks=True, lstrip_blocks=True,
233 finalize=finalize_for_yaml).render(
235 # cfg_content = io.StringIO(template.format(**template_kwargs))
236 cfg_file = os.path.join(config_file_path, nfvi_cfg)
237 # must write as root, so use sudo
238 self.connection.execute("cat | sudo tee {}".format(cfg_file), stdin=cfg_content)
240 def _prepare_collectd_conf(self, config_file_path):
241 """ Prepare collectd conf """
244 "interval": self.interval,
245 "loadplugins": set(chain(LIST_PLUGINS_ENABLED, self.plugins.keys())),
246 # Optional fields PortName is descriptive only, use whatever is present
247 "port_names": self.port_names,
248 # "ovs_bridge_interfaces": ["br-int"],
249 "plugins": self.plugins,
251 self._provide_config_file(config_file_path, self.COLLECTD_CONF, kwargs)
252 self._provide_config_file(self.BAR_COLLECTD_CONF_PATH,
253 self.COLLECTD_CONF, kwargs)
255 def _setup_ovs_stats(self, connection):
257 socket_path = self.plugins["ovs_stats"].get("ovs_socket_path", self.OVS_SOCKET_PATH)
259 # ovs_stats is not a dict
260 socket_path = self.OVS_SOCKET_PATH
261 status = connection.execute("test -S {}".format(socket_path))[0]
263 LOG.error("cannot find OVS socket %s", socket_path)
265 def _reset_rabbitmq(self, connection):
267 LOG.debug("reset and setup amqp to collect data from collectd")
268 # ensure collectd.conf.d exists to avoid error/warning
269 cmd_list = ["sudo mkdir -p /etc/collectd/collectd.conf.d",
270 "sudo service rabbitmq-server restart",
271 "sudo rabbitmqctl stop_app",
272 "sudo rabbitmqctl reset",
273 "sudo rabbitmqctl start_app",
274 "sudo rabbitmqctl add_user admin admin",
275 "sudo rabbitmqctl authenticate_user admin admin",
276 "sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'"
280 exit_status, _, stderr = connection.execute(cmd)
282 raise ResourceCommandError(command=cmd, stderr=stderr)
284 def _check_rabbitmq_user(self, connection, user='admin'):
285 exit_status, stdout, _ = connection.execute("sudo rabbitmqctl list_users")
287 for line in stdout.split('\n')[1:]:
288 if line.split('\t')[0] == user:
291 def _set_rabbitmq_admin_user(self, connection):
292 LOG.debug("add admin user to amqp")
293 cmd_list = ["sudo rabbitmqctl add_user admin admin",
294 "sudo rabbitmqctl authenticate_user admin admin",
295 "sudo rabbitmqctl set_permissions -p / admin '.*' '.*' '.*'"
299 exit_status, stdout, stderr = connection.execute(cmd)
301 raise ResourceCommandError(command=cmd, stdout=stdout, stderr=stderr)
303 def _start_rabbitmq(self, connection):
304 if self._reset_mq_flag:
305 self._reset_rabbitmq(connection)
307 if not self._check_rabbitmq_user(connection):
308 self._set_rabbitmq_admin_user(connection)
310 # check stdout for "sudo rabbitmqctl status" command
311 cmd = "sudo rabbitmqctl status"
312 _, stdout, stderr = connection.execute(cmd)
313 if not re.search("RabbitMQ", stdout):
314 LOG.error("rabbitmqctl status don't have RabbitMQ in running apps")
315 raise ResourceCommandError(command=cmd, stderr=stderr)
317 def _start_collectd(self, connection, bin_path):
318 LOG.debug("Starting collectd to collect NFVi stats")
319 collectd_path = os.path.join(bin_path, "collectd", "sbin", "collectd")
320 config_file_path = os.path.join(bin_path, "collectd", "etc")
321 self._prepare_collectd_conf(config_file_path)
323 connection.execute('sudo pkill -x -9 collectd')
324 cmd = "which %s > /dev/null 2>&1" % collectd_path
325 exit_status, _, stderr = connection.execute(cmd)
327 raise ResourceCommandError(command=cmd, stderr=stderr)
329 if "ovs_stats" in self.plugins:
330 self._setup_ovs_stats(connection)
332 LOG.debug("Starting collectd to collect NFVi stats")
333 LOG.debug("Start collectd service..... %s second timeout", self.timeout)
334 # intel_pmu plug requires large numbers of files open, so try to set
335 # ulimit -n to a large value
337 cmd = "sudo bash -c 'ulimit -n 1000000 ; %s'" % collectd_path
338 exit_status, _, stderr = connection.execute(cmd, timeout=self.timeout)
340 raise ResourceCommandError(command=cmd, stderr=stderr)
344 def initiate_systemagent(self, bin_path):
345 """ Start system agent for NFVi collection on host """
348 self._start_rabbitmq(self.connection)
349 self._start_collectd(self.connection, bin_path)
350 except ResourceCommandError as e:
351 LOG.exception("Exception during collectd and rabbitmq start: %s", str(e))
355 """ start nfvi collection """
357 LOG.debug("Start NVFi metric collection...")
360 """ stop nfvi collection """
365 LOG.debug("Stop resource monitor...")
367 if self.amqp_client is not None:
368 # we proper and try to join first
369 self.amqp_client.join(3)
370 self.amqp_client.terminate()
372 LOG.debug("Check if %s is running", agent)
373 status, pid = self.check_if_system_agent_running(agent)
374 LOG.debug("status %s pid %s", status, pid)
379 self.connection.execute('sudo kill -9 "%s"' % pid)
380 self.connection.execute('sudo pkill -9 "%s"' % agent)
382 if self._reset_mq_flag:
383 self.connection.execute('sudo service rabbitmq-server stop')
384 self.connection.execute("sudo rabbitmqctl stop_app")