1 # -*- coding: utf-8 -*-
3 # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 # not use this file except in compliance with the License. You may obtain
5 # a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 # License for the specific language governing permissions and limitations
15 """Classes used by collectd.py"""
22 from opnfv.deployment import factory
24 from functest.utils import constants
26 ID_RSA_PATH = '/root/.ssh/id_rsa'
27 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
28 DEF_PLUGIN_INTERVAL = 10
29 COLLECTD_CONF = '/etc/collectd.conf'
30 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
31 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
32 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
33 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
35 APEX_USER_STACK = 'stack'
36 APEX_PKEY = '/root/.ssh/id_rsa'
40 """Node configuration class"""
41 def __init__(self, attrs):
42 self.__null = attrs[0]
44 self.__name = attrs[2]
45 self.__status = attrs[3] if attrs[3] else None
46 self.__taskState = attrs[4]
47 self.__pwrState = attrs[5]
48 self.__ip = re.sub('^[a-z]+=', '', attrs[6])
59 """Get node IP address"""
68 handler = factory.Factory.get_handler('apex',
72 nodes = handler.get_nodes()
76 class ConfigServer(object):
77 """Class to get env configuration"""
78 def __init__(self, host, user, logger, priv_key=None):
82 self.__priv_key = priv_key
84 self.__logger = logger
86 self.__private_key_file = ID_RSA_PATH
87 if not os.path.isfile(self.__private_key_file):
89 "Private key file '{}'".format(self.__private_key_file)
91 raise IOError("Private key file '{}' not found.".format(
92 self.__private_key_file))
94 # get list of available nodes
95 ssh, sftp = self.__open_sftp_session(
96 self.__host, self.__user, self.__passwd)
98 fuel_node_passed = False
100 while (attempt <= 10) and not fuel_node_passed:
101 stdin, stdout, stderr = ssh.exec_command(
102 "source stackrc; nova list")
103 stderr_lines = stderr.readlines()
105 self.__logger.warning(
106 "'Apex node' command failed (try {}):".format(attempt))
107 for line in stderr_lines:
108 self.__logger.debug(line.strip())
110 fuel_node_passed = True
113 "'Apex node' command passed (try {})".format(attempt))
115 if not fuel_node_passed:
117 "'Apex node' command failed. This was the last try.")
119 "'Apex node' command failed. This was the last try.")
120 node_table = stdout.readlines()\
122 # skip table title and parse table values
124 for entry in node_table[3:]:
125 if entry[0] == '+' or entry[0] == '\n':
130 Node([str(x.strip(' \n')) for x in entry.split('|')]))
132 def get_controllers(self):
133 # Get list of controllers
134 print self.__nodes[0]._Node__ip
136 [node for node in self.__nodes if 'controller' in node.get_name()])
138 def get_computes(self):
139 # Get list of computes
141 [node for node in self.__nodes if 'compute' in node.get_name()])
147 def __open_sftp_session(self, host, user, passwd=None):
148 # Connect to given host.
149 """Keyword arguments:
150 host -- host to connect
152 passwd -- password to use
154 Return tuple of SSH and SFTP client instances.
157 ssh = paramiko.SSHClient()
158 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
160 # try a direct access using password or private key
161 if not passwd and not self.__priv_key:
163 self.__priv_key = paramiko.RSAKey.from_private_key_file(
164 self.__private_key_file)
166 # connect to the server
168 host, username=user, password=passwd, pkey=self.__priv_key)
169 sftp = ssh.open_sftp()
171 # return SFTP client instance
174 def get_plugin_interval(self, compute, plugin):
175 """Find the plugin interval in collectd configuration.
178 compute -- compute node instance
179 plugin -- plug-in name
181 If found, return interval value, otherwise the default value"""
182 default_interval = DEF_PLUGIN_INTERVAL
183 compute_name = compute.get_name()
184 nodes = get_apex_nodes()
186 if compute_name == node.get_dict()['name']:
187 stdout = node.run_cmd(
188 'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
190 return default_interval
191 for line in stdout.split('\n'):
192 if 'Interval' in line:
194 return default_interval
196 def get_plugin_config_values(self, compute, plugin, parameter):
197 """Get parameter values from collectd config file.
200 compute -- compute node instance
201 plugin -- plug-in name
202 parameter -- plug-in parameter
204 Return list of found values."""
206 compute_name = compute.get_name()
207 nodes = get_apex_nodes()
209 if compute_name == node.get_dict()['name']:
210 stdout = node.run_cmd(
211 'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
213 return default_values
214 for line in stdout.split('\n'):
215 if 'Interfaces' in line:
216 return line.split(' ', 1)[1]
217 elif 'Bridges' in line:
218 return line.split(' ', 1)[1]
219 elif 'Cores' in line:
220 return line.split(' ', 1)[1]
223 return default_values
225 def execute_command(self, command, host_ip=None, ssh=None):
226 """Execute command on node and return list of lines of standard output.
230 host_ip -- IP of the node
231 ssh -- existing open SSH session to use
233 One of host_ip or ssh must not be None. If both are not None,
234 existing ssh session is used.
236 if host_ip is None and ssh is None:
237 raise ValueError('One of host_ip or ssh must not be None.')
239 ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
240 stdin, stdout, stderr = ssh.exec_command(command)
241 return stdout.readlines()
243 def get_ovs_interfaces(self, compute):
244 """Get list of configured OVS interfaces
247 compute -- compute node instance
249 compute_name = compute.get_name()
250 nodes = get_apex_nodes()
252 if compute_name == node.get_dict()['name']:
253 stdout = node.run_cmd('sudo ovs-vsctl list-br')
256 def is_gnocchi_running(self, controller):
257 """Check whether Gnocchi is running on controller.
260 controller -- controller node instance
262 Return boolean value whether Gnocchi is running.
264 gnocchi_present = False
265 controller_name = controller.get_name()
266 nodes = get_apex_nodes()
268 if controller_name == node.get_dict()['name']:
269 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
270 stdout = node.run_cmd(
271 "source overcloudrc.v3;"
272 + "openstack catalog list | grep gnocchi")
275 elif 'gnocchi' in stdout:
276 gnocchi_present = True
277 return gnocchi_present
280 return gnocchi_present
282 def is_aodh_running(self, controller):
283 """Check whether aodh service is running on controller
286 controller_name = controller.get_name()
287 nodes = get_apex_nodes()
289 if controller_name == node.get_dict()['name']:
290 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
291 stdout = node.run_cmd(
292 "source overcloudrc.v3;"
293 + "openstack catalog list | grep aodh")
296 elif 'aodh' in stdout:
303 def is_mcelog_installed(self, compute, package):
304 """Check whether package exists on compute node.
307 compute -- compute node instance
308 package -- Linux package to search for
310 Return boolean value whether package is installed.
312 compute_name = compute.get_name()
313 nodes = get_apex_nodes()
315 if compute_name == node.get_dict()['name']:
316 stdout = node.run_cmd(
317 'rpm -qa | grep mcelog')
320 elif 'mcelog' in stdout:
325 def is_rdt_available(self, compute):
326 """Check whether the compute node is a virtual machine."""
327 compute_name = compute.get_name()
328 nodes = get_apex_nodes()
330 if compute_name == node.get_dict()['name']:
331 stdout = node.run_cmd('cat /proc/cpuinfo | grep hypervisor')
332 if 'hypervisor' in stdout:
336 def is_libpqos_on_node(self, compute):
337 """Check whether libpqos is present on compute node"""
339 compute_name = compute.get_name()
340 nodes = get_apex_nodes()
342 if compute_name == node.get_dict()['name']:
343 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
344 if 'libpqos' in stdout:
348 def check_aodh_plugin_included(self, compute):
349 """Check if aodh plugin is included in collectd.conf file.
350 If not, try to enable it.
353 compute -- compute node instance
355 Return boolean value whether AODH plugin is included
356 or it's enabling was successful.
358 compute_name = compute.get_name()
359 nodes = get_apex_nodes()
361 if compute_name == node.get_dict()['name']:
362 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
363 if 'aodh.conf' not in aodh_conf:
365 "AODH Plugin not included in {}".format(compute_name))
369 "AODH plugin present in compute node {}" .format(
374 def check_gnocchi_plugin_included(self, compute):
375 """Check if gnocchi plugin is included in collectd.conf file.
376 If not, try to enable it.
379 compute -- compute node instance
381 Return boolean value whether gnocchi plugin is included
382 or it's enabling was successful.
384 compute_name = compute.get_name()
385 nodes = get_apex_nodes()
387 if compute_name == node.get_dict()['name']:
388 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
389 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
391 "Gnocchi Plugin not included in node {}".format(
396 "Gnocchi plugin available in compute node {}" .format(
401 def check_snmp_plugin_included(self, compute):
402 """Check if SNMP plugin is active in compute node.
404 snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
405 snmp_string = 'INTEL-RDT-MIB::intelRdt'
406 compute_name = compute.get_name()
407 nodes = get_apex_nodes()
409 if compute_name == node.get_dict()['name']:
410 stdout = node.run_cmd(
411 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
412 snmp_mib, snmp_string))
413 self.__logger.info("snmp output = {}" .format(stdout))
420 self, compute, plugins, error_plugins, create_backup=True):
421 """Enable plugins on compute node
424 compute -- compute node instance
425 plugins -- list of plugins to be enabled
427 Return boolean value indicating whether function was successful.
429 csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
430 plugins = sorted(plugins)
431 compute_name = compute.get_name()
432 nodes = get_apex_nodes()
434 if compute_name == node.get_dict()['name']:
435 node.put_file(csv_file, 'csv.conf')
438 + '/etc/collectd/collectd.conf.d/csv.conf')
441 def restart_collectd(self, compute):
442 """Restart collectd on compute node.
445 compute -- compute node instance
447 Retrun tuple with boolean indicating success and list of warnings
448 received during collectd start.
450 compute_name = compute.get_name()
451 nodes = get_apex_nodes()
453 def get_collectd_processes(compute_node):
454 """Get number of running collectd processes.
457 ssh_session -- instance of SSH session in which to check
460 stdout = compute_node.run_cmd("pgrep collectd")
464 if compute_name == node.get_dict()['name']:
465 # node.run_cmd('su; "opnfvapex"')
466 self.__logger.info('Stopping collectd service...')
467 node.run_cmd('sudo systemctl stop collectd')
469 if get_collectd_processes(node):
470 self.__logger.error('Collectd is still running...')
472 self.__logger.info('Starting collectd service...')
473 stdout = node.run_cmd('sudo systemctl start collectd')
476 output.strip() for output in stdout if 'WARN: ' in output]
477 if get_collectd_processes(node) == 0:
478 self.__logger.error('Collectd is still not running...')
479 return False, warning
482 def trigger_alarm_update(self, alarm, compute_node):
483 # TODO: move these actions to main, with criteria lists so that we can reference that
484 # i.e. test_plugin_with_aodh(self, compute, plugin.., logger, criteria_list, alarm_action)
485 if alarm == 'mcelog':
486 compute_node.run_cmd('sudo modprobe mce-inject')
487 compute_node.run_cmd('sudo ./mce-inject_ea < corrected')
488 if alarm == 'ovs_events':
489 compute_node.run_cmd('sudo ifconfig -a | grep br0')
490 compute_node.run_cmd('sudo ifconfig br0 down; sudo ifconfig br0 up')
492 def test_plugins_with_aodh(
493 self, compute, plugin_interval, logger,
499 nodes = get_apex_nodes()
500 compute_node = [node for node in nodes if node.get_dict()['name'] == compute][0]
502 if node.is_controller():
503 self.__logger.info('Getting AODH Alarm list on {}' .format(
504 (node.get_dict()['name'])))
505 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
506 self.trigger_alarm_update(criteria_list, compute_node)
507 stdout = node.run_cmd(
508 "source overcloudrc.v3;"
509 + "aodh alarm list | grep {0} | grep {1}"
510 .format(criteria_list, compute))
512 self.__logger.info("aodh alarm list was empty")
514 for line in stdout.splitlines():
515 line = line.replace('|', "")
516 metric_id = line.split()[0]
517 stdout = node.run_cmd(
518 'source overcloudrc.v3; aodh alarm show {}' .format(
521 self.__logger.info("aodh alarm list was empty")
523 for line in stdout.splitlines()[3: -1]:
524 line = line.replace('|', "")
525 if line.split()[0] == 'state_timestamp':
526 timestamps1 = line.split()[1]
527 self.trigger_alarm_update(criteria_list, compute_node)
529 stdout = node.run_cmd(
530 "source overcloudrc.v3; aodh alarm show {}" .format(
533 self.__logger.info("aodh alarm list was empty")
535 for line in stdout.splitlines()[3:-1]:
536 line = line.replace('|', "")
537 if line.split()[0] == 'state_timestamp':
538 timestamps2 = line.split()[1]
539 if timestamps1 == timestamps2:
541 "Data not updated after interval of 12 seconds")
544 self.__logger.info("PASS")
547 def test_plugins_with_gnocchi(
548 self, compute, plugin_interval, logger,
554 nodes = get_apex_nodes()
555 if plugin_interval > 15:
556 sleep_time = plugin_interval*2
561 if node.is_controller():
562 self.__logger.info('Getting gnocchi metric list on {}' .format(
563 (node.get_dict()['name'])))
564 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
565 stdout = node.run_cmd(
566 "source overcloudrc.v3;"
567 + "gnocchi metric list | grep {0} | grep {1}"
568 .format(criteria_list, compute))
570 self.__logger.info("gnocchi list was empty")
572 for line in stdout.splitlines():
573 line = line.replace('|', "")
574 metric_id = line.split()[0]
575 stdout = node.run_cmd(
576 'source overcloudrc.v3;gnocchi measures show {}'.format(
579 self.__logger.info("gnocchi list was empty")
581 for line in stdout.splitlines()[3: -1]:
585 timestamps1 = line.replace('|', "")
586 timestamps1 = timestamps1.split()[0]
587 time.sleep(sleep_time)
588 stdout = node.run_cmd(
589 "source overcloudrc.v3;gnocchi measures show {}".format(
592 self.__logger.info("gnocchi measures was empty")
594 for line in stdout.splitlines()[3:-1]:
598 timestamps2 = line.replace('|', "")
599 timestamps2 = timestamps2.split()[0]
600 if timestamps1 == timestamps2:
602 "Plugin Interval is {}" .format(plugin_interval))
604 "Data not updated after {} seconds".format(
608 self.__logger.info("PASS")
612 def test_plugins_with_snmp(
613 self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
614 snmp_mib_strings=[], snmp_in_commands=[]):
616 if plugin in ('hugepages', 'intel_rdt', 'mcelog'):
617 nodes = get_apex_nodes()
619 if compute == node.get_dict()['name']:
620 stdout = node.run_cmd(
621 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
622 snmp_mib_files, snmp_mib_strings))
623 self.__logger.info("{}" .format(stdout))
625 self.__logger.info("No output from snmpwalk")
627 elif 'OID' in stdout:
628 self.__logger.info("SNMP query failed")
631 counter1 = stdout.split()[3]
633 stdout = node.run_cmd(
634 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
635 snmp_mib_files, snmp_mib_strings))
636 self.__logger.info("{}" .format(stdout))
638 self.__logger.info("No output from snmpwalk")
639 elif 'OID' in stdout:
641 "SNMP query failed during second check")
642 self.__logger.info("waiting for 10 sec")
644 stdout = node.run_cmd(
645 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
646 snmp_mib_files, snmp_mib_strings))
647 self.__logger.info("{}" .format(stdout))
649 self.__logger.info("No output from snmpwalk")
650 elif 'OID' in stdout:
651 self.__logger.info("SNMP query failed again")
652 self.__logger.info("Failing this test case")
655 counter2 = stdout.split()[3]
657 if counter1 == counter2: