1 # -*- coding: utf-8 -*-
3 # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 # not use this file except in compliance with the License. You may obtain
5 # a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 # License for the specific language governing permissions and limitations
15 """Classes used by collectd.py"""
22 from opnfv.deployment import factory
24 from functest.utils import constants
26 ID_RSA_PATH = '/root/.ssh/id_rsa'
27 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
28 DEF_PLUGIN_INTERVAL = 10
29 COLLECTD_CONF = '/etc/collectd.conf'
30 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
31 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
32 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
33 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
35 APEX_USER_STACK = 'stack'
36 APEX_PKEY = '/root/.ssh/id_rsa'
40 """Node configuration class"""
41 def __init__(self, attrs):
42 self.__null = attrs[0]
44 self.__name = attrs[2]
45 self.__status = attrs[3] if attrs[3] else None
46 self.__taskState = attrs[4]
47 self.__pwrState = attrs[5]
48 self.__ip = re.sub('^[a-z]+=', '', attrs[6])
59 """Get node IP address"""
68 handler = factory.Factory.get_handler('apex',
72 nodes = handler.get_nodes()
76 class ConfigServer(object):
77 """Class to get env configuration"""
78 def __init__(self, host, user, logger, priv_key=None):
82 self.__priv_key = priv_key
84 self.__logger = logger
86 self.__private_key_file = ID_RSA_PATH
87 if not os.path.isfile(self.__private_key_file):
89 "Private key file '{}'".format(self.__private_key_file)
91 raise IOError("Private key file '{}' not found.".format(
92 self.__private_key_file))
94 # get list of available nodes
95 ssh, sftp = self.__open_sftp_session(
96 self.__host, self.__user, self.__passwd)
98 fuel_node_passed = False
100 while (attempt <= 10) and not fuel_node_passed:
101 stdin, stdout, stderr = ssh.exec_command(
102 "source stackrc; nova list")
103 stderr_lines = stderr.readlines()
105 self.__logger.warning(
106 "'Apex node' command failed (try {}):".format(attempt))
107 for line in stderr_lines:
108 self.__logger.debug(line.strip())
110 fuel_node_passed = True
113 "'Apex node' command passed (try {})".format(attempt))
115 if not fuel_node_passed:
117 "'Apex node' command failed. This was the last try.")
119 "'Apex node' command failed. This was the last try.")
120 node_table = stdout.readlines()\
122 # skip table title and parse table values
124 for entry in node_table[3:]:
125 if entry[0] == '+' or entry[0] == '\n':
130 Node([str(x.strip(' \n')) for x in entry.split('|')]))
132 def get_controllers(self):
133 # Get list of controllers
134 print self.__nodes[0]._Node__ip
136 [node for node in self.__nodes if 'controller' in node.get_name()])
138 def get_computes(self):
139 # Get list of computes
141 [node for node in self.__nodes if 'compute' in node.get_name()])
147 def __open_sftp_session(self, host, user, passwd=None):
148 # Connect to given host.
149 """Keyword arguments:
150 host -- host to connect
152 passwd -- password to use
154 Return tuple of SSH and SFTP client instances.
157 ssh = paramiko.SSHClient()
158 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
160 # try a direct access using password or private key
161 if not passwd and not self.__priv_key:
163 self.__priv_key = paramiko.RSAKey.from_private_key_file(
164 self.__private_key_file)
166 # connect to the server
168 host, username=user, password=passwd, pkey=self.__priv_key)
169 sftp = ssh.open_sftp()
171 # return SFTP client instance
174 def get_plugin_interval(self, compute, plugin):
175 """Find the plugin interval in collectd configuration.
178 compute -- compute node instance
179 plugin -- plug-in name
181 If found, return interval value, otherwise the default value"""
182 default_interval = DEF_PLUGIN_INTERVAL
183 compute_name = compute.get_name()
184 nodes = get_apex_nodes()
186 if compute_name == node.get_dict()['name']:
187 stdout = node.run_cmd(
188 'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
190 return default_interval
191 for line in stdout.split('\n'):
192 if 'Interval' in line:
194 return default_interval
196 def get_plugin_config_values(self, compute, plugin, parameter):
197 """Get parameter values from collectd config file.
200 compute -- compute node instance
201 plugin -- plug-in name
202 parameter -- plug-in parameter
204 Return list of found values."""
206 compute_name = compute.get_name()
207 nodes = get_apex_nodes()
209 if compute_name == node.get_dict()['name']:
210 stdout = node.run_cmd(
211 'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
213 return default_values
214 for line in stdout.split('\n'):
215 if 'Interfaces' in line:
216 return line.split(' ', 1)[1]
217 elif 'Bridges' in line:
218 return line.split(' ', 1)[1]
219 elif 'Cores' in line:
220 return line.split(' ', 1)[1]
223 return default_values
225 def execute_command(self, command, host_ip=None, ssh=None):
226 """Execute command on node and return list of lines of standard output.
230 host_ip -- IP of the node
231 ssh -- existing open SSH session to use
233 One of host_ip or ssh must not be None. If both are not None,
234 existing ssh session is used.
236 if host_ip is None and ssh is None:
237 raise ValueError('One of host_ip or ssh must not be None.')
239 ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
240 stdin, stdout, stderr = ssh.exec_command(command)
241 return stdout.readlines()
243 def get_ovs_interfaces(self, compute):
244 """Get list of configured OVS interfaces
247 compute -- compute node instance
249 compute_name = compute.get_name()
250 nodes = get_apex_nodes()
252 if compute_name == node.get_dict()['name']:
253 stdout = node.run_cmd('sudo ovs-vsctl list-br')
256 def is_gnocchi_running(self, controller):
257 """Check whether Gnocchi is running on controller.
260 controller -- controller node instance
262 Return boolean value whether Gnocchi is running.
264 gnocchi_present = False
265 controller_name = controller.get_name()
266 nodes = get_apex_nodes()
268 if controller_name == node.get_dict()['name']:
269 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
270 stdout = node.run_cmd(
271 "source overcloudrc.v3;"
272 + "openstack catalog list | grep gnocchi")
275 elif 'gnocchi' in stdout:
276 gnocchi_present = True
277 return gnocchi_present
280 return gnocchi_present
282 def is_aodh_running(self, controller):
283 """Check whether aodh service is running on controller
286 controller_name = controller.get_name()
287 nodes = get_apex_nodes()
289 if controller_name == node.get_dict()['name']:
290 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
291 stdout = node.run_cmd(
292 "source overcloudrc.v3;"
293 + "openstack catalog list | grep aodh")
296 elif 'aodh' in stdout:
303 def is_mcelog_installed(self, compute, package):
304 """Check whether package exists on compute node.
307 compute -- compute node instance
308 package -- Linux package to search for
310 Return boolean value whether package is installed.
312 compute_name = compute.get_name()
313 nodes = get_apex_nodes()
315 if compute_name == node.get_dict()['name']:
316 stdout = node.run_cmd(
317 'rpm -qa | grep mcelog')
320 elif 'mcelog' in stdout:
325 def is_rdt_available(self, compute):
326 """Check whether the compute node is a virtual machine."""
327 compute_name = compute.get_name()
328 nodes = get_apex_nodes()
330 if compute_name == node.get_dict()['name']:
331 stdout = node.run_cmd('cat /proc/cpuinfo | grep hypervisor')
332 if 'hypervisor' in stdout:
336 def is_libpqos_on_node(self, compute):
337 """Check whether libpqos is present on compute node"""
339 compute_name = compute.get_name()
340 nodes = get_apex_nodes()
342 if compute_name == node.get_dict()['name']:
343 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
344 if 'libpqos' in stdout:
348 def check_aodh_plugin_included(self, compute):
349 """Check if aodh plugin is included in collectd.conf file.
350 If not, try to enable it.
353 compute -- compute node instance
355 Return boolean value whether AODH plugin is included
356 or it's enabling was successful.
358 compute_name = compute.get_name()
359 nodes = get_apex_nodes()
361 if compute_name == node.get_dict()['name']:
362 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
363 if 'aodh.conf' not in aodh_conf:
365 "AODH Plugin not included in {}".format(compute_name))
369 "AODH plugin present in compute node {}" .format(
374 def check_gnocchi_plugin_included(self, compute):
375 """Check if gnocchi plugin is included in collectd.conf file.
376 If not, try to enable it.
379 compute -- compute node instance
381 Return boolean value whether gnocchi plugin is included
382 or it's enabling was successful.
384 compute_name = compute.get_name()
385 nodes = get_apex_nodes()
387 if compute_name == node.get_dict()['name']:
388 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
389 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
391 "Gnocchi Plugin not included in node {}".format(
396 "Gnocchi plugin available in compute node {}" .format(
401 def check_snmp_plugin_included(self, compute):
402 """Check if SNMP plugin is active in compute node.
404 snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
405 snmp_string = 'INTEL-RDT-MIB::intelRdt'
406 compute_name = compute.get_name()
407 nodes = get_apex_nodes()
409 if compute_name == node.get_dict()['name']:
410 stdout = node.run_cmd(
411 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
412 snmp_mib, snmp_string))
413 self.__logger.info("snmp output = {}" .format(stdout))
420 self, compute, plugins, error_plugins, create_backup=True):
421 """Enable plugins on compute node
424 compute -- compute node instance
425 plugins -- list of plugins to be enabled
427 Return boolean value indicating whether function was successful.
429 csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
430 plugins = sorted(plugins)
431 compute_name = compute.get_name()
432 nodes = get_apex_nodes()
434 if compute_name == node.get_dict()['name']:
435 node.put_file(csv_file, 'csv.conf')
438 + '/etc/collectd/collectd.conf.d/csv.conf')
441 def restart_collectd(self, compute):
442 """Restart collectd on compute node.
445 compute -- compute node instance
447 Retrun tuple with boolean indicating success and list of warnings
448 received during collectd start.
450 compute_name = compute.get_name()
451 nodes = get_apex_nodes()
453 def get_collectd_processes(compute_node):
454 """Get number of running collectd processes.
457 ssh_session -- instance of SSH session in which to check
460 stdout = compute_node.run_cmd("pgrep collectd")
464 if compute_name == node.get_dict()['name']:
465 # node.run_cmd('su; "opnfvapex"')
466 self.__logger.info('Stopping collectd service...')
467 node.run_cmd('sudo systemctl stop collectd')
469 if get_collectd_processes(node):
470 self.__logger.error('Collectd is still running...')
472 self.__logger.info('Starting collectd service...')
473 stdout = node.run_cmd('sudo systemctl start collectd')
476 output.strip() for output in stdout if 'WARN: ' in output]
477 if get_collectd_processes(node) == 0:
478 self.__logger.error('Collectd is still not running...')
479 return False, warning
482 def test_plugins_with_aodh(
483 self, compute, plugin_interval, logger,
489 nodes = get_apex_nodes()
491 if node.is_controller():
492 self.__logger.info('Getting AODH Alarm list on {}' .format(
493 (node.get_dict()['name'])))
494 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
495 stdout = node.run_cmd(
496 "source overcloudrc.v3;"
497 + "aodh alarm list | grep {0} | grep {1}"
498 .format(criteria_list, compute))
500 self.__logger.info("aodh alarm list was empty")
502 for line in stdout.splitlines():
503 line = line.replace('|', "")
504 metric_id = line.split()[0]
505 stdout = node.run_cmd(
506 'source overcloudrc.v3; aodh alarm show {}' .format(
509 self.__logger.info("aodh alarm list was empty")
511 for line in stdout.splitlines()[3: -1]:
512 line = line.replace('|', "")
513 if line.split()[0] == 'timestamp':
514 timestamps1 = line.split()[1]
518 stdout = node.run_cmd(
519 "source overcloudrc.v3; aodh alarm show {}" .format(
522 self.__logger.info("aodh alarm list was empty")
524 for line in stdout.splitlines()[3:-1]:
525 line = line.replace('|', "")
526 if line.split()[0] == 'timestamp':
527 timestamps2 = line.split()[1]
530 if timestamps1 == timestamps2:
532 "Data not updated after interval of 12 seconds")
535 self.__logger.info("PASS")
538 def test_plugins_with_gnocchi(
539 self, compute, plugin_interval, logger,
545 nodes = get_apex_nodes()
546 if plugin_interval > 15:
547 sleep_time = plugin_interval*2
552 if node.is_controller():
553 self.__logger.info('Getting gnocchi metric list on {}' .format(
554 (node.get_dict()['name'])))
555 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
556 stdout = node.run_cmd(
557 "source overcloudrc.v3;"
558 + "gnocchi metric list | grep {0} | grep {1}"
559 .format(criteria_list, compute))
561 self.__logger.info("gnocchi list was empty")
563 for line in stdout.splitlines():
564 line = line.replace('|', "")
565 metric_id = line.split()[0]
566 stdout = node.run_cmd(
567 'source overcloudrc.v3;gnocchi measures show {}'.format(
570 self.__logger.info("gnocchi list was empty")
572 for line in stdout.splitlines()[3: -1]:
576 timestamps1 = line.replace('|', "")
577 timestamps1 = timestamps1.split()[0]
578 time.sleep(sleep_time)
579 stdout = node.run_cmd(
580 "source overcloudrc.v3;gnocchi measures show {}".format(
583 self.__logger.info("gnocchi measures was empty")
585 for line in stdout.splitlines()[3:-1]:
589 timestamps2 = line.replace('|', "")
590 timestamps2 = timestamps2.split()[0]
591 if timestamps1 == timestamps2:
593 "Plugin Interval is {}" .format(plugin_interval))
595 "Data not updated after {} seconds".format(
599 self.__logger.info("PASS")
603 def test_plugins_with_snmp(
604 self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
605 snmp_mib_strings=[], snmp_in_commands=[]):
607 if plugin == 'hugepages' or 'intel_rdt' or 'mcelog':
608 nodes = get_apex_nodes()
610 if compute == node.get_dict()['name']:
611 stdout = node.run_cmd(
612 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
613 snmp_mib_files, snmp_mib_strings))
614 self.__logger.info("{}" .format(stdout))
616 self.__logger.info("No output from snmpwalk")
618 elif 'OID' in stdout:
619 self.__logger.info("SNMP query failed")
622 counter1 = stdout.split()[3]
624 stdout = node.run_cmd(
625 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
626 snmp_mib_files, snmp_mib_strings))
627 self.__logger.info("{}" .format(stdout))
629 self.__logger.info("No output from snmpwalk")
630 elif 'OID' in stdout:
632 "SNMP query failed during second check")
633 self.__logger.info("waiting for 10 sec")
635 stdout = node.run_cmd(
636 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
637 snmp_mib_files, snmp_mib_strings))
638 self.__logger.info("{}" .format(stdout))
640 self.__logger.info("No output from snmpwalk")
641 elif 'OID' in stdout:
642 self.__logger.info("SNMP query failed again")
643 self.__logger.info("Failing this test case")
646 counter2 = stdout.split()[3]
648 if counter1 == counter2: