1 # -*- coding: utf-8 -*-
3 # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 # not use this file except in compliance with the License. You may obtain
5 # a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 # License for the specific language governing permissions and limitations
15 """Classes used by collectd.py"""
22 from opnfv.deployment import factory
24 from functest.utils import constants
26 ID_RSA_PATH = '/root/.ssh/id_rsa'
27 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
28 DEF_PLUGIN_INTERVAL = 10
29 COLLECTD_CONF = '/etc/collectd.conf'
30 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
31 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
32 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
33 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
35 APEX_USER_STACK = 'stack'
36 APEX_PKEY = '/root/.ssh/id_rsa'
40 """Node configuration class"""
41 def __init__(self, attrs):
42 self.__null = attrs[0]
44 self.__name = attrs[2]
45 self.__status = attrs[3] if attrs[3] else None
46 self.__taskState = attrs[4]
47 self.__pwrState = attrs[5]
48 self.__ip = re.sub('^[a-z]+=', '', attrs[6])
59 """Get node IP address"""
68 handler = factory.Factory.get_handler('apex',
72 nodes = handler.get_nodes()
76 class ConfigServer(object):
77 """Class to get env configuration"""
78 def __init__(self, host, user, logger, priv_key=None):
82 self.__priv_key = priv_key
84 self.__logger = logger
86 self.__private_key_file = ID_RSA_PATH
87 if not os.path.isfile(self.__private_key_file):
89 "Private key file '{}'".format(self.__private_key_file)
91 raise IOError("Private key file '{}' not found.".format(
92 self.__private_key_file))
94 # get list of available nodes
95 ssh, sftp = self.__open_sftp_session(
96 self.__host, self.__user, self.__passwd)
98 fuel_node_passed = False
100 while (attempt <= 10) and not fuel_node_passed:
101 stdin, stdout, stderr = ssh.exec_command(
102 "source stackrc; nova list")
103 stderr_lines = stderr.readlines()
105 self.__logger.warning(
106 "'Apex node' command failed (try {}):".format(attempt))
107 for line in stderr_lines:
108 self.__logger.debug(line.strip())
110 fuel_node_passed = True
113 "'Apex node' command passed (try {})".format(attempt))
115 if not fuel_node_passed:
117 "'Apex node' command failed. This was the last try.")
119 "'Apex node' command failed. This was the last try.")
120 node_table = stdout.readlines()\
122 # skip table title and parse table values
124 for entry in node_table[3:]:
125 if entry[0] == '+' or entry[0] == '\n':
130 Node([str(x.strip(' \n')) for x in entry.split('|')]))
132 def get_controllers(self):
133 # Get list of controllers
134 print self.__nodes[0]._Node__ip
136 [node for node in self.__nodes if 'controller' in node.get_name()])
138 def get_computes(self):
139 # Get list of computes
141 [node for node in self.__nodes if 'compute' in node.get_name()])
147 def __open_sftp_session(self, host, user, passwd=None):
148 # Connect to given host.
149 """Keyword arguments:
150 host -- host to connect
152 passwd -- password to use
154 Return tuple of SSH and SFTP client instances.
157 ssh = paramiko.SSHClient()
158 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
160 # try a direct access using password or private key
161 if not passwd and not self.__priv_key:
163 self.__priv_key = paramiko.RSAKey.from_private_key_file(
164 self.__private_key_file)
166 # connect to the server
168 host, username=user, password=passwd, pkey=self.__priv_key)
169 sftp = ssh.open_sftp()
171 # return SFTP client instance
174 def get_plugin_interval(self, compute, plugin):
175 """Find the plugin interval in collectd configuration.
178 compute -- compute node instance
179 plugin -- plug-in name
181 If found, return interval value, otherwise the default value"""
182 default_interval = DEF_PLUGIN_INTERVAL
183 compute_name = compute.get_name()
184 nodes = get_apex_nodes()
186 if compute_name == node.get_dict()['name']:
187 stdout = node.run_cmd(
188 'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
190 return default_interval
191 for line in stdout.split('\n'):
192 if 'Interval' in line:
194 return default_interval
196 def get_plugin_config_values(self, compute, plugin, parameter):
197 """Get parameter values from collectd config file.
200 compute -- compute node instance
201 plugin -- plug-in name
202 parameter -- plug-in parameter
204 Return list of found values."""
206 compute_name = compute.get_name()
207 nodes = get_apex_nodes()
209 if compute_name == node.get_dict()['name']:
210 stdout = node.run_cmd(
211 'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
213 return default_values
214 for line in stdout.split('\n'):
215 if 'Interfaces' in line:
216 return line.split(' ', 1)[1]
217 elif 'Bridges' in line:
218 return line.split(' ', 1)[1]
219 elif 'Cores' in line:
220 return line.split(' ', 1)[1]
223 return default_values
225 def execute_command(self, command, host_ip=None, ssh=None):
226 """Execute command on node and return list of lines of standard output.
230 host_ip -- IP of the node
231 ssh -- existing open SSH session to use
233 One of host_ip or ssh must not be None. If both are not None,
234 existing ssh session is used.
236 if host_ip is None and ssh is None:
237 raise ValueError('One of host_ip or ssh must not be None.')
239 ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
240 stdin, stdout, stderr = ssh.exec_command(command)
241 return stdout.readlines()
243 def get_ovs_interfaces(self, compute):
244 """Get list of configured OVS interfaces
247 compute -- compute node instance
249 compute_name = compute.get_name()
250 nodes = get_apex_nodes()
252 if compute_name == node.get_dict()['name']:
253 stdout = node.run_cmd('sudo ovs-vsctl list-br')
256 def is_gnocchi_running(self, controller):
257 """Check whether Gnocchi is running on controller.
260 controller -- controller node instance
262 Return boolean value whether Gnocchi is running.
264 gnocchi_present = False
265 controller_name = controller.get_name()
266 nodes = get_apex_nodes()
268 if controller_name == node.get_dict()['name']:
269 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
270 stdout = node.run_cmd(
271 "source overcloudrc.v3;"
272 + "openstack catalog list | grep gnocchi")
275 elif 'gnocchi' in stdout:
276 gnocchi_present = True
277 return gnocchi_present
280 return gnocchi_present
282 def is_aodh_running(self, controller):
283 """Check whether aodh service is running on controller
286 controller_name = controller.get_name()
287 nodes = get_apex_nodes()
289 if controller_name == node.get_dict()['name']:
290 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
291 stdout = node.run_cmd(
292 "source overcloudrc.v3;"
293 + "openstack catalog list | grep aodh")
296 elif 'aodh' in stdout:
303 def is_mcelog_installed(self, compute, package):
304 """Check whether package exists on compute node.
307 compute -- compute node instance
308 package -- Linux package to search for
310 Return boolean value whether package is installed.
312 compute_name = compute.get_name()
313 nodes = get_apex_nodes()
315 if compute_name == node.get_dict()['name']:
316 stdout = node.run_cmd(
317 'rpm -qa | grep mcelog')
320 elif 'mcelog' in stdout:
325 def is_libpqos_on_node(self, compute):
326 """Check whether libpqos is present on compute node"""
328 compute_name = compute.get_name()
329 nodes = get_apex_nodes()
331 if compute_name == node.get_dict()['name']:
332 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
333 if 'libpqos' in stdout:
337 def check_aodh_plugin_included(self, compute):
338 """Check if aodh plugin is included in collectd.conf file.
339 If not, try to enable it.
342 compute -- compute node instance
344 Return boolean value whether AODH plugin is included
345 or it's enabling was successful.
347 compute_name = compute.get_name()
348 nodes = get_apex_nodes()
350 if compute_name == node.get_dict()['name']:
351 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
352 if 'aodh.conf' not in aodh_conf:
354 "AODH Plugin not included in {}".format(compute_name))
358 "AODH plugin present in compute node {}" .format(
363 def check_gnocchi_plugin_included(self, compute):
364 """Check if gnocchi plugin is included in collectd.conf file.
365 If not, try to enable it.
368 compute -- compute node instance
370 Return boolean value whether gnocchi plugin is included
371 or it's enabling was successful.
373 compute_name = compute.get_name()
374 nodes = get_apex_nodes()
376 if compute_name == node.get_dict()['name']:
377 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
378 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
380 "Gnocchi Plugin not included in node {}".format(
385 "Gnocchi plugin available in compute node {}" .format(
390 def check_snmp_plugin_included(self, compute):
391 """Check if SNMP plugin is active in compute node.
393 snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
394 snmp_string = 'INTEL-RDT-MIB::intelRdt'
395 compute_name = compute.get_name()
396 nodes = get_apex_nodes()
398 if compute_name == node.get_dict()['name']:
399 stdout = node.run_cmd(
400 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
401 snmp_mib, snmp_string))
402 self.__logger.info("snmp output = {}" .format(stdout))
409 self, compute, plugins, error_plugins, create_backup=True):
410 """Enable plugins on compute node
413 compute -- compute node instance
414 plugins -- list of plugins to be enabled
416 Return boolean value indicating whether function was successful.
418 csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
419 plugins = sorted(plugins)
420 compute_name = compute.get_name()
421 nodes = get_apex_nodes()
423 if compute_name == node.get_dict()['name']:
424 node.put_file(csv_file, 'csv.conf')
427 + '/etc/collectd/collectd.conf.d/csv.conf')
430 def restart_collectd(self, compute):
431 """Restart collectd on compute node.
434 compute -- compute node instance
436 Retrun tuple with boolean indicating success and list of warnings
437 received during collectd start.
439 compute_name = compute.get_name()
440 nodes = get_apex_nodes()
442 def get_collectd_processes(compute_node):
443 """Get number of running collectd processes.
446 ssh_session -- instance of SSH session in which to check
449 stdout = compute_node.run_cmd("pgrep collectd")
453 if compute_name == node.get_dict()['name']:
454 # node.run_cmd('su; "opnfvapex"')
455 self.__logger.info('Stopping collectd service...')
456 node.run_cmd('sudo systemctl stop collectd')
458 if get_collectd_processes(node):
459 self.__logger.error('Collectd is still running...')
461 self.__logger.info('Starting collectd service...')
462 stdout = node.run_cmd('sudo systemctl start collectd')
465 output.strip() for output in stdout if 'WARN: ' in output]
466 if get_collectd_processes(node) == 0:
467 self.__logger.error('Collectd is still not running...')
468 return False, warning
471 def test_plugins_with_aodh(
472 self, compute, plugin_interval, logger,
478 nodes = get_apex_nodes()
480 if node.is_controller():
481 self.__logger.info('Getting AODH Alarm list on {}' .format(
482 (node.get_dict()['name'])))
483 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
484 stdout = node.run_cmd(
485 "source overcloudrc.v3;"
486 + "aodh alarm list | grep {0} | grep {1}"
487 .format(criteria_list, compute))
489 self.__logger.info("aodh alarm list was empty")
491 for line in stdout.splitlines():
492 line = line.replace('|', "")
493 metric_id = line.split()[0]
494 stdout = node.run_cmd(
495 'source overcloudrc.v3; aodh alarm show {}' .format(
498 self.__logger.info("aodh alarm list was empty")
500 for line in stdout.splitlines()[3: -1]:
501 line = line.replace('|', "")
502 if line.split()[0] == 'timestamp':
503 timestamps1 = line.split()[1]
507 stdout = node.run_cmd(
508 "source overcloudrc.v3; aodh alarm show {}" .format(
511 self.__logger.info("aodh alarm list was empty")
513 for line in stdout.splitlines()[3:-1]:
514 line = line.replace('|', "")
515 if line.split()[0] == 'timestamp':
516 timestamps2 = line.split()[1]
519 if timestamps1 == timestamps2:
521 "Data not updated after interval of 12 seconds")
524 self.__logger.info("PASS")
527 def test_plugins_with_gnocchi(
528 self, compute, plugin_interval, logger,
534 nodes = get_apex_nodes()
535 sleep_time = plugin_interval + 2
537 if node.is_controller():
538 self.__logger.info('Getting gnocchi metric list on {}' .format(
539 (node.get_dict()['name'])))
540 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
541 stdout = node.run_cmd(
542 "source overcloudrc.v3;"
543 + "gnocchi metric list | grep {0} | grep {1}"
544 .format(criteria_list, compute))
546 self.__logger.info("gnocchi list was empty")
548 for line in stdout.splitlines():
549 line = line.replace('|', "")
550 metric_id = line.split()[0]
551 stdout = node.run_cmd(
552 'source overcloudrc.v3;gnocchi measures show {}'.format(
555 self.__logger.info("gnocchi list was empty")
557 for line in stdout.splitlines()[3: -1]:
561 timestamps1 = line.replace('|', "")
562 timestamps1 = timestamps1.split()[0]
563 time.sleep(sleep_time)
564 stdout = node.run_cmd(
565 "source overcloudrc.v3;gnocchi measures show {}".format(
568 self.__logger.info("gnocchi measures was empty")
570 for line in stdout.splitlines()[3:-1]:
574 timestamps2 = line.replace('|', "")
575 timestamps2 = timestamps2.split()[0]
576 if timestamps1 == timestamps2:
578 "Plugin Interval is {}" .format(plugin_interval))
580 "Data not updated after {} seconds".format(
584 self.__logger.info("PASS")
588 def test_plugins_with_snmp(
589 self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
590 snmp_mib_strings=[], snmp_in_commands=[]):
592 if plugin == 'hugepages' or 'intel_rdt' or 'mcelog':
593 nodes = get_apex_nodes()
595 if compute == node.get_dict()['name']:
596 stdout = node.run_cmd(
597 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
598 snmp_mib_files, snmp_mib_strings))
599 self.__logger.info("{}" .format(stdout))
601 self.__logger.info("No output from snmpwalk")
603 elif 'OID' in stdout:
604 self.__logger.info("SNMP query failed")
607 counter1 = stdout.split()[3]
609 stdout = node.run_cmd(
610 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
611 snmp_mib_files, snmp_mib_strings))
612 self.__logger.info("{}" .format(stdout))
614 self.__logger.info("No output from snmpwalk")
615 elif 'OID' in stdout:
617 "SNMP query failed during second check")
618 self.__logger.info("waiting for 10 sec")
620 stdout = node.run_cmd(
621 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
622 snmp_mib_files, snmp_mib_strings))
623 self.__logger.info("{}" .format(stdout))
625 self.__logger.info("No output from snmpwalk")
626 elif 'OID' in stdout:
627 self.__logger.info("SNMP query failed again")
628 self.__logger.info("Failing this test case")
631 counter2 = stdout.split()[3]
633 if counter1 == counter2: