1 # -*- coding: utf-8 -*-
3 # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 # not use this file except in compliance with the License. You may obtain
5 # a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 # License for the specific language governing permissions and limitations
15 """Classes used by collectd.py"""
22 from opnfv.deployment import factory
23 ID_RSA_PATH = '/root/.ssh/id_rsa'
24 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
25 DEF_PLUGIN_INTERVAL = 10
26 COLLECTD_CONF = '/etc/collectd.conf'
27 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
28 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
29 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
30 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
32 APEX_USER_STACK = 'stack'
33 APEX_PKEY = '/root/.ssh/id_rsa'
37 """Node configuration class"""
38 def __init__(self, attrs):
39 self.__null = attrs[0]
41 self.__name = attrs[2]
42 self.__status = attrs[3] if attrs[3] else None
43 self.__taskState = attrs[4]
44 self.__pwrState = attrs[5]
45 self.__ip = re.sub('^[a-z]+=', '', attrs[6])
56 """Get node IP address"""
65 handler = factory.Factory.get_handler('apex',
69 nodes = handler.get_nodes()
73 class ConfigServer(object):
74 """Class to get env configuration"""
75 def __init__(self, host, user, logger, priv_key=None):
79 self.__priv_key = priv_key
81 self.__logger = logger
83 self.__private_key_file = ID_RSA_PATH
84 if not os.path.isfile(self.__private_key_file):
86 "Private key file '{}'".format(self.__private_key_file)
88 raise IOError("Private key file '{}' not found.".format(
89 self.__private_key_file))
91 # get list of available nodes
92 ssh, sftp = self.__open_sftp_session(
93 self.__host, self.__user, self.__passwd)
95 fuel_node_passed = False
97 while (attempt <= 10) and not fuel_node_passed:
98 stdin, stdout, stderr = ssh.exec_command(
99 "source stackrc; nova list")
100 stderr_lines = stderr.readlines()
102 self.__logger.warning(
103 "'Apex node' command failed (try {}):".format(attempt))
104 for line in stderr_lines:
105 self.__logger.debug(line.strip())
107 fuel_node_passed = True
110 "'Apex node' command passed (try {})".format(attempt))
112 if not fuel_node_passed:
114 "'Apex node' command failed. This was the last try.")
116 "'Apex node' command failed. This was the last try.")
117 node_table = stdout.readlines()\
119 # skip table title and parse table values
121 for entry in node_table[3:]:
122 if entry[0] == '+' or entry[0] == '\n':
127 Node([str(x.strip(' \n')) for x in entry.split('|')]))
129 def get_controllers(self):
130 # Get list of controllers
131 print self.__nodes[0]._Node__ip
133 [node for node in self.__nodes if 'controller' in node.get_name()])
135 def get_computes(self):
136 # Get list of computes
138 [node for node in self.__nodes if 'compute' in node.get_name()])
144 def __open_sftp_session(self, host, user, passwd=None):
145 # Connect to given host.
146 """Keyword arguments:
147 host -- host to connect
149 passwd -- password to use
151 Return tuple of SSH and SFTP client instances.
154 ssh = paramiko.SSHClient()
155 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
157 # try a direct access using password or private key
158 if not passwd and not self.__priv_key:
160 self.__priv_key = paramiko.RSAKey.from_private_key_file(
161 self.__private_key_file)
163 # connect to the server
165 host, username=user, password=passwd, pkey=self.__priv_key)
166 sftp = ssh.open_sftp()
168 # return SFTP client instance
171 def get_plugin_interval(self, compute, plugin):
172 """Find the plugin interval in collectd configuration.
175 compute -- compute node instance
176 plugin -- plug-in name
178 If found, return interval value, otherwise the default value"""
179 default_interval = DEF_PLUGIN_INTERVAL
180 compute_name = compute.get_name()
181 nodes = get_apex_nodes()
183 if compute_name == node.get_dict()['name']:
184 stdout = node.run_cmd(
185 'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
187 return default_interval
188 for line in stdout.split('\n'):
189 if 'Interval' in line:
191 return default_interval
193 def get_plugin_config_values(self, compute, plugin, parameter):
194 """Get parameter values from collectd config file.
197 compute -- compute node instance
198 plugin -- plug-in name
199 parameter -- plug-in parameter
201 Return list of found values."""
203 compute_name = compute.get_name()
204 nodes = get_apex_nodes()
206 if compute_name == node.get_dict()['name']:
207 stdout = node.run_cmd(
208 'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
210 return default_values
211 for line in stdout.split('\n'):
212 if 'Interfaces' in line:
213 return line.split(' ', 1)[1]
214 elif 'Bridges' in line:
215 return line.split(' ', 1)[1]
216 elif 'Cores' in line:
217 return line.split(' ', 1)[1]
220 return default_values
222 def execute_command(self, command, host_ip=None, ssh=None):
223 """Execute command on node and return list of lines of standard output.
227 host_ip -- IP of the node
228 ssh -- existing open SSH session to use
230 One of host_ip or ssh must not be None. If both are not None,
231 existing ssh session is used.
233 if host_ip is None and ssh is None:
234 raise ValueError('One of host_ip or ssh must not be None.')
236 ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
237 stdin, stdout, stderr = ssh.exec_command(command)
238 return stdout.readlines()
240 def get_ovs_interfaces(self, compute):
241 """Get list of configured OVS interfaces
244 compute -- compute node instance
246 compute_name = compute.get_name()
247 nodes = get_apex_nodes()
249 if compute_name == node.get_dict()['name']:
250 stdout = node.run_cmd('sudo ovs-vsctl list-br')
253 def is_gnocchi_running(self, controller):
254 """Check whether Gnocchi is running on controller.
257 controller -- controller node instance
259 Return boolean value whether Gnocchi is running.
261 gnocchi_present = False
262 controller_name = controller.get_name()
263 nodes = get_apex_nodes()
265 if controller_name == node.get_dict()['name']:
267 '/home/opnfv/functest/conf/openstack.creds',
269 stdout = node.run_cmd(
270 "source overcloudrc.v3;"
271 + "openstack catalog list | grep gnocchi")
274 elif 'gnocchi' in stdout:
275 gnocchi_present = True
276 return gnocchi_present
279 return gnocchi_present
281 def is_aodh_running(self, controller):
282 """Check whether aodh service is running on controller
285 controller_name = controller.get_name()
286 nodes = get_apex_nodes()
288 if controller_name == node.get_dict()['name']:
290 '/home/opnfv/functest/conf/openstack.creds',
292 stdout = node.run_cmd(
293 "source overcloudrc.v3;"
294 + "openstack catalog list | grep aodh")
297 elif 'aodh' in stdout:
304 def is_mcelog_installed(self, compute, package):
305 """Check whether package exists on compute node.
308 compute -- compute node instance
309 package -- Linux package to search for
311 Return boolean value whether package is installed.
313 compute_name = compute.get_name()
314 nodes = get_apex_nodes()
316 if compute_name == node.get_dict()['name']:
317 stdout = node.run_cmd(
318 'rpm -qa | grep mcelog')
321 elif 'mcelog' in stdout:
326 def is_libpqos_on_node(self, compute):
327 """Check whether libpqos is present on compute node"""
329 compute_name = compute.get_name()
330 nodes = get_apex_nodes()
332 if compute_name == node.get_dict()['name']:
333 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
334 if 'libpqos' in stdout:
338 def check_aodh_plugin_included(self, compute):
339 """Check if aodh plugin is included in collectd.conf file.
340 If not, try to enable it.
343 compute -- compute node instance
345 Return boolean value whether AODH plugin is included
346 or it's enabling was successful.
348 compute_name = compute.get_name()
349 nodes = get_apex_nodes()
351 if compute_name == node.get_dict()['name']:
352 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
353 if 'aodh.conf' not in aodh_conf:
355 "AODH Plugin not included in {}".format(compute_name))
359 "AODH plugin present in compute node {}" .format(
364 def check_gnocchi_plugin_included(self, compute):
365 """Check if gnocchi plugin is included in collectd.conf file.
366 If not, try to enable it.
369 compute -- compute node instance
371 Return boolean value whether gnocchi plugin is included
372 or it's enabling was successful.
374 compute_name = compute.get_name()
375 nodes = get_apex_nodes()
377 if compute_name == node.get_dict()['name']:
378 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
379 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
381 "Gnocchi Plugin not included in node {}".format(
386 "Gnocchi plugin available in compute node {}" .format(
391 def check_snmp_plugin_included(self, compute):
392 """Check if SNMP plugin is active in compute node.
394 snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
395 snmp_string = 'INTEL-RDT-MIB::intelRdt'
396 compute_name = compute.get_name()
397 nodes = get_apex_nodes()
399 if compute_name == node.get_dict()['name']:
400 stdout = node.run_cmd(
401 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
402 snmp_mib, snmp_string))
403 self.__logger.info("snmp output = {}" .format(stdout))
410 self, compute, plugins, error_plugins, create_backup=True):
411 """Enable plugins on compute node
414 compute -- compute node instance
415 plugins -- list of plugins to be enabled
417 Return boolean value indicating whether function was successful.
419 csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
420 plugins = sorted(plugins)
421 compute_name = compute.get_name()
422 nodes = get_apex_nodes()
424 if compute_name == node.get_dict()['name']:
425 node.put_file(csv_file, 'csv.conf')
428 + '/etc/collectd/collectd.conf.d/csv.conf')
431 def restart_collectd(self, compute):
432 """Restart collectd on compute node.
435 compute -- compute node instance
437 Retrun tuple with boolean indicating success and list of warnings
438 received during collectd start.
440 compute_name = compute.get_name()
441 nodes = get_apex_nodes()
443 def get_collectd_processes(compute_node):
444 """Get number of running collectd processes.
447 ssh_session -- instance of SSH session in which to check
450 stdout = compute_node.run_cmd("pgrep collectd")
454 if compute_name == node.get_dict()['name']:
455 # node.run_cmd('su; "opnfvapex"')
456 self.__logger.info('Stopping collectd service...')
457 node.run_cmd('sudo systemctl stop collectd')
459 if get_collectd_processes(node):
460 self.__logger.error('Collectd is still running...')
462 self.__logger.info('Starting collectd service...')
463 stdout = node.run_cmd('sudo systemctl start collectd')
466 output.strip() for output in stdout if 'WARN: ' in output]
467 if get_collectd_processes(node) == 0:
468 self.__logger.error('Collectd is still not running...')
469 return False, warning
472 def test_plugins_with_aodh(
473 self, compute, plugin_interval, logger,
479 nodes = get_apex_nodes()
481 if node.is_controller():
482 self.__logger.info('Getting AODH Alarm list on {}' .format(
483 (node.get_dict()['name'])))
485 '/home/opnfv/functest/conf/openstack.creds',
487 stdout = node.run_cmd(
488 "source overcloudrc.v3;"
489 + "aodh alarm list | grep {0} | grep {1}"
490 .format(criteria_list, compute))
492 self.__logger.info("aodh alarm list was empty")
494 for line in stdout.splitlines():
495 line = line.replace('|', "")
496 metric_id = line.split()[0]
497 stdout = node.run_cmd(
498 'source overcloudrc.v3; aodh alarm show {}' .format(
501 self.__logger.info("aodh alarm list was empty")
503 for line in stdout.splitlines()[3: -1]:
504 line = line.replace('|', "")
505 if line.split()[0] == 'timestamp':
506 timestamps1 = line.split()[1]
510 stdout = node.run_cmd(
511 "source overcloudrc.v3; aodh alarm show {}" .format(
514 self.__logger.info("aodh alarm list was empty")
516 for line in stdout.splitlines()[3:-1]:
517 line = line.replace('|', "")
518 if line.split()[0] == 'timestamp':
519 timestamps2 = line.split()[1]
522 if timestamps1 == timestamps2:
524 "Data not updated after interval of 12 seconds")
527 self.__logger.info("PASS")
530 def test_plugins_with_gnocchi(
531 self, compute, plugin_interval, logger,
537 nodes = get_apex_nodes()
538 sleep_time = plugin_interval + 2
540 if node.is_controller():
541 self.__logger.info('Getting gnocchi metric list on {}' .format(
542 (node.get_dict()['name'])))
544 '/home/opnfv/functest/conf/openstack.creds',
546 stdout = node.run_cmd(
547 "source overcloudrc.v3;"
548 + "gnocchi metric list | grep {0} | grep {1}"
549 .format(criteria_list, compute))
551 self.__logger.info("gnocchi list was empty")
553 for line in stdout.splitlines():
554 line = line.replace('|', "")
555 metric_id = line.split()[0]
556 stdout = node.run_cmd(
557 'source overcloudrc.v3;gnocchi measures show {}'.format(
560 self.__logger.info("gnocchi list was empty")
562 for line in stdout.splitlines()[3: -1]:
566 timestamps1 = line.replace('|', "")
567 timestamps1 = timestamps1.split()[0]
568 time.sleep(sleep_time)
569 stdout = node.run_cmd(
570 "source overcloudrc.v3;gnocchi measures show {}".format(
573 self.__logger.info("gnocchi measures was empty")
575 for line in stdout.splitlines()[3:-1]:
579 timestamps2 = line.replace('|', "")
580 timestamps2 = timestamps2.split()[0]
581 if timestamps1 == timestamps2:
583 "Plugin Interval is {}" .format(plugin_interval))
585 "Data not updated after {} seconds".format(
589 self.__logger.info("PASS")
593 def test_plugins_with_snmp(
594 self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
595 snmp_mib_strings=[], snmp_in_commands=[]):
597 if plugin == 'hugepages' or 'intel_rdt' or 'mcelog':
598 nodes = get_apex_nodes()
600 if compute == node.get_dict()['name']:
601 stdout = node.run_cmd(
602 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
603 snmp_mib_files, snmp_mib_strings))
604 self.__logger.info("{}" .format(stdout))
606 self.__logger.info("No output from snmpwalk")
608 elif 'OID' in stdout:
609 self.__logger.info("SNMP query failed")
612 counter1 = stdout.split()[3]
614 stdout = node.run_cmd(
615 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
616 snmp_mib_files, snmp_mib_strings))
617 self.__logger.info("{}" .format(stdout))
619 self.__logger.info("No output from snmpwalk")
620 elif 'OID' in stdout:
622 "SNMP query failed during second check")
623 self.__logger.info("waiting for 10 sec")
625 stdout = node.run_cmd(
626 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
627 snmp_mib_files, snmp_mib_strings))
628 self.__logger.info("{}" .format(stdout))
630 self.__logger.info("No output from snmpwalk")
631 elif 'OID' in stdout:
632 self.__logger.info("SNMP query failed again")
633 self.__logger.info("Failing this test case")
636 counter2 = stdout.split()[3]
638 if counter1 == counter2: