1 # -*- coding: utf-8 -*-
5 # Licensed under the Apache License, Version 2.0 (the "License"); you may
6 # not use this file except in compliance with the License. You may obtain
7 # a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 # License for the specific language governing permissions and limitations
17 """Classes used by collectd.py"""
25 from opnfv.deployment import factory
27 from functest.utils import constants
29 ID_RSA_PATH = '/root/.ssh/id_rsa'
30 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
31 DEF_PLUGIN_INTERVAL = 10
32 COLLECTD_CONF = '/etc/collectd.conf'
33 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
34 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
35 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
36 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
38 APEX_USER_STACK = 'stack'
39 APEX_PKEY = '/root/.ssh/id_rsa'
40 TEST_VM_IMAGE = 'cirros-0.4.0-x86_64-disk.img'
41 TEST_VM_IMAGE_PATH = '/home/opnfv/functest/images/' + TEST_VM_IMAGE
45 """Node configuration class"""
46 def __init__(self, attrs):
47 self.__null = attrs[0]
49 self.__name = attrs[2]
50 self.__status = attrs[3] if attrs[3] else None
51 self.__taskState = attrs[4]
52 self.__pwrState = attrs[5]
53 self.__ip = re.sub('^[a-z]+=', '', attrs[6])
64 """Get node IP address"""
73 handler = factory.Factory.get_handler('apex',
77 nodes = handler.get_nodes()
81 class ConfigServer(object):
82 """Class to get env configuration"""
83 def __init__(self, host, user, logger, priv_key=None):
87 self.__priv_key = priv_key
89 self.__logger = logger
91 self.__private_key_file = ID_RSA_PATH
92 if not os.path.isfile(self.__private_key_file):
94 "Private key file '{}'".format(self.__private_key_file)
96 raise IOError("Private key file '{}' not found.".format(
97 self.__private_key_file))
99 # get list of available nodes
100 ssh, sftp = self.__open_sftp_session(
101 self.__host, self.__user, self.__passwd)
103 fuel_node_passed = False
105 while (attempt <= 10) and not fuel_node_passed:
106 stdin, stdout, stderr = ssh.exec_command(
107 "source stackrc; nova list")
108 stderr_lines = stderr.readlines()
110 self.__logger.warning(
111 "'Apex node' command failed (try {}):".format(attempt))
112 for line in stderr_lines:
113 self.__logger.debug(line.strip())
115 fuel_node_passed = True
118 "'Apex node' command passed (try {})".format(attempt))
120 if not fuel_node_passed:
122 "'Apex node' command failed. This was the last try.")
124 "'Apex node' command failed. This was the last try.")
125 node_table = stdout.readlines()\
127 # skip table title and parse table values
129 for entry in node_table[3:]:
130 if entry[0] == '+' or entry[0] == '\n':
135 Node([str(x.strip(' \n')) for x in entry.split('|')]))
137 def get_controllers(self):
138 # Get list of controllers
139 print self.__nodes[0]._Node__ip
141 [node for node in self.__nodes if 'controller' in node.get_name()])
143 def get_computes(self):
144 # Get list of computes
146 [node for node in self.__nodes if 'compute' in node.get_name()])
152 def __open_sftp_session(self, host, user, passwd=None):
153 # Connect to given host.
154 """Keyword arguments:
155 host -- host to connect
157 passwd -- password to use
159 Return tuple of SSH and SFTP client instances.
162 ssh = paramiko.SSHClient()
163 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
165 # try a direct access using password or private key
166 if not passwd and not self.__priv_key:
168 self.__priv_key = paramiko.RSAKey.from_private_key_file(
169 self.__private_key_file)
171 # connect to the server
173 host, username=user, password=passwd, pkey=self.__priv_key)
174 sftp = ssh.open_sftp()
176 # return SFTP client instance
179 def get_plugin_interval(self, compute, plugin):
180 """Find the plugin interval in collectd configuration.
183 compute -- compute node instance
184 plugin -- plug-in name
186 If found, return interval value, otherwise the default value"""
187 default_interval = DEF_PLUGIN_INTERVAL
188 compute_name = compute.get_name()
189 nodes = get_apex_nodes()
191 if compute_name == node.get_dict()['name']:
192 stdout = node.run_cmd(
193 'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
195 return default_interval
196 for line in stdout.split('\n'):
197 if 'Interval' in line:
199 return default_interval
201 def get_plugin_config_values(self, compute, plugin, parameter):
202 """Get parameter values from collectd config file.
205 compute -- compute node instance
206 plugin -- plug-in name
207 parameter -- plug-in parameter
209 Return list of found values."""
211 compute_name = compute.get_name()
212 nodes = get_apex_nodes()
214 if compute_name == node.get_dict()['name']:
215 stdout = node.run_cmd(
216 'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
218 return default_values
219 for line in stdout.split('\n'):
220 if 'Interfaces' in line:
221 return line.split(' ', 1)[1]
222 elif 'Bridges' in line:
223 return line.split(' ', 1)[1]
224 elif 'Cores' in line:
225 return line.split(' ', 1)[1]
228 return default_values
230 def execute_command(self, command, host_ip=None, ssh=None):
231 """Execute command on node and return list of lines of standard output.
235 host_ip -- IP of the node
236 ssh -- existing open SSH session to use
238 One of host_ip or ssh must not be None. If both are not None,
239 existing ssh session is used.
241 if host_ip is None and ssh is None:
242 raise ValueError('One of host_ip or ssh must not be None.')
244 ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
245 stdin, stdout, stderr = ssh.exec_command(command)
246 return stdout.readlines()
248 def get_ovs_interfaces(self, compute):
249 """Get list of configured OVS interfaces
252 compute -- compute node instance
254 compute_name = compute.get_name()
255 nodes = get_apex_nodes()
257 if compute_name == node.get_dict()['name']:
258 stdout = node.run_cmd('sudo ovs-vsctl list-br')
261 def is_gnocchi_running(self, controller):
262 """Check whether Gnocchi is running on controller.
265 controller -- controller node instance
267 Return boolean value whether Gnocchi is running.
269 gnocchi_present = False
270 controller_name = controller.get_name()
271 nodes = get_apex_nodes()
273 if controller_name == node.get_dict()['name']:
274 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
275 stdout = node.run_cmd(
276 "source overcloudrc.v3;"
277 + "openstack catalog list | grep gnocchi")
280 elif 'gnocchi' in stdout:
281 gnocchi_present = True
282 return gnocchi_present
285 return gnocchi_present
287 def is_aodh_running(self, controller):
288 """Check whether aodh service is running on controller
291 controller_name = controller.get_name()
292 nodes = get_apex_nodes()
294 if controller_name == node.get_dict()['name']:
295 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
296 stdout = node.run_cmd(
297 "source overcloudrc.v3;"
298 + "openstack catalog list | grep aodh")
301 elif 'aodh' in stdout:
308 def is_redis_running(self, compute):
309 """Check whether redis service is running on compute"""
310 compute_name = compute.get_name()
311 nodes = get_apex_nodes()
313 if compute_name == node.get_dict()['name']:
314 stdout = node.run_cmd('sudo systemctl status docker'
316 '| grep barometer-redis')
317 if stdout and 'barometer-redis' in stdout:
319 'Redis is running in node {}'.format(
323 'Redis is *not* running in node {}'.format(
327 def is_dma_server_running(self, compute):
328 """Check whether DMA server is running on compute"""
329 compute_name = compute.get_name()
330 nodes = get_apex_nodes()
332 if compute_name == node.get_dict()['name']:
333 stdout = node.run_cmd('sudo systemctl status docker'
335 '| grep opnfv/barometer-dma')
336 if stdout and '/server' in stdout:
338 'DMA Server is running in node {}'.format(
342 'DMA Server is *not* running in node {}'.format(
346 def is_dma_infofetch_running(self, compute):
347 """Check whether DMA infofetch is running on compute"""
348 compute_name = compute.get_name()
349 nodes = get_apex_nodes()
351 if compute_name == node.get_dict()['name']:
352 stdout = node.run_cmd('sudo systemctl status docker'
354 '| grep opnfv/barometer-dma')
355 if stdout and '/infofetch' in stdout:
357 'DMA InfoFetch is running in node {}'.format(
361 'DMA InfoFetch is *not* running in node {}'.format(
365 def get_dma_config(self, compute):
366 """Get config values of DMA"""
367 compute_name = compute.get_name()
368 nodes = get_apex_nodes()
370 if compute_name == node.get_dict()['name']:
371 # We use following after functest accept python-toml
372 # stdout = node.run_cmd(
373 # 'cat /etc/barometer-dma/config.toml')
375 # agent_conf = toml.loads(stdout)
376 # except (TypeError, TomlDecodeError) as e:
377 # self.__logger.error(
378 # 'DMA config error: {}'.format(e))
383 'egrep "listen_port|amqp_"'
384 ' /etc/barometer-dma/config.toml'
385 '| sed -e "s/#.*$//" | sed -e "s/=/:/"'
387 stdout = node.run_cmd(readcmd)
388 agent_conf = {"server": yaml.safe_load(stdout)}
391 'ping -n -c1 ' + agent_conf["server"]["amqp_host"] +
392 '| sed -ne "s/^.*bytes from //p" | sed -e "s/:.*//"'
394 agent_conf["server"]["amqp_host"] = node.run_cmd(pingcmd)
399 def is_mcelog_installed(self, compute, package):
400 """Check whether package exists on compute node.
403 compute -- compute node instance
404 package -- Linux package to search for
406 Return boolean value whether package is installed.
408 compute_name = compute.get_name()
409 nodes = get_apex_nodes()
411 if compute_name == node.get_dict()['name']:
412 stdout = node.run_cmd(
413 'rpm -qa | grep mcelog')
416 elif 'mcelog' in stdout:
421 def is_rdt_available(self, compute):
422 """Check whether the compute node is a virtual machine."""
423 compute_name = compute.get_name()
424 nodes = get_apex_nodes()
426 if compute_name == node.get_dict()['name']:
427 stdout = node.run_cmd('cat /proc/cpuinfo | grep hypervisor')
428 if 'hypervisor' in stdout:
432 def is_libpqos_on_node(self, compute):
433 """Check whether libpqos is present on compute node"""
435 compute_name = compute.get_name()
436 nodes = get_apex_nodes()
438 if compute_name == node.get_dict()['name']:
439 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
440 if 'libpqos' in stdout:
444 def check_aodh_plugin_included(self, compute):
445 """Check if aodh plugin is included in collectd.conf file.
446 If not, try to enable it.
449 compute -- compute node instance
451 Return boolean value whether AODH plugin is included
452 or it's enabling was successful.
454 compute_name = compute.get_name()
455 nodes = get_apex_nodes()
457 if compute_name == node.get_dict()['name']:
458 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
459 if 'aodh.conf' not in aodh_conf:
461 "AODH Plugin not included in {}".format(compute_name))
465 "AODH plugin present in compute node {}" .format(
470 def check_gnocchi_plugin_included(self, compute):
471 """Check if gnocchi plugin is included in collectd.conf file.
472 If not, try to enable it.
475 compute -- compute node instance
477 Return boolean value whether gnocchi plugin is included
478 or it's enabling was successful.
480 compute_name = compute.get_name()
481 nodes = get_apex_nodes()
483 if compute_name == node.get_dict()['name']:
484 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
485 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
487 "Gnocchi Plugin not included in node {}".format(
492 "Gnocchi plugin available in compute node {}" .format(
497 def check_snmp_plugin_included(self, compute):
498 """Check if SNMP plugin is active in compute node.
500 snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
501 snmp_string = 'INTEL-RDT-MIB::intelRdt'
502 compute_name = compute.get_name()
503 nodes = get_apex_nodes()
505 if compute_name == node.get_dict()['name']:
506 stdout = node.run_cmd(
507 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
508 snmp_mib, snmp_string))
509 self.__logger.info("snmp output = {}" .format(stdout))
516 self, compute, plugins, error_plugins, create_backup=True):
517 """Enable plugins on compute node
520 compute -- compute node instance
521 plugins -- list of plugins to be enabled
523 Return boolean value indicating whether function was successful.
525 csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
526 plugins = sorted(plugins)
527 compute_name = compute.get_name()
528 nodes = get_apex_nodes()
530 if compute_name == node.get_dict()['name']:
531 node.put_file(csv_file, 'csv.conf')
534 + '/etc/collectd/collectd.conf.d/csv.conf')
537 def restart_collectd(self, compute):
538 """Restart collectd on compute node.
541 compute -- compute node instance
543 Retrun tuple with boolean indicating success and list of warnings
544 received during collectd start.
546 compute_name = compute.get_name()
547 nodes = get_apex_nodes()
549 def get_collectd_processes(compute_node):
550 """Get number of running collectd processes.
553 ssh_session -- instance of SSH session in which to check
556 stdout = compute_node.run_cmd("pgrep collectd")
560 if compute_name == node.get_dict()['name']:
561 # node.run_cmd('su; "opnfvapex"')
562 self.__logger.info('Stopping collectd service...')
563 node.run_cmd('sudo systemctl stop collectd')
565 if get_collectd_processes(node):
566 self.__logger.error('Collectd is still running...')
568 self.__logger.info('Starting collectd service...')
569 stdout = node.run_cmd('sudo systemctl start collectd')
572 output.strip() for output in stdout if 'WARN: ' in output]
573 if get_collectd_processes(node) == 0:
574 self.__logger.error('Collectd is still not running...')
575 return False, warning
578 def trigger_alarm_update(self, alarm, compute_node):
579 # TODO: move these actions to main, with criteria lists so that we can reference that
580 # i.e. test_plugin_with_aodh(self, compute, plugin.., logger, criteria_list, alarm_action)
581 if alarm == 'mcelog':
582 compute_node.run_cmd('sudo modprobe mce-inject')
583 compute_node.run_cmd('sudo ./mce-inject_ea < corrected')
584 if alarm == 'ovs_events':
585 compute_node.run_cmd('sudo ifconfig -a | grep br0')
586 compute_node.run_cmd('sudo ifconfig br0 down; sudo ifconfig br0 up')
588 def test_plugins_with_aodh(
589 self, compute, plugin_interval, logger,
595 nodes = get_apex_nodes()
596 compute_node = [node for node in nodes if node.get_dict()['name'] == compute][0]
598 if node.is_controller():
599 self.__logger.info('Getting AODH Alarm list on {}' .format(
600 (node.get_dict()['name'])))
601 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
602 self.trigger_alarm_update(criteria_list, compute_node)
603 stdout = node.run_cmd(
604 "source overcloudrc.v3;"
605 + "aodh alarm list | grep {0} | grep {1}"
606 .format(criteria_list, compute))
608 self.__logger.info("aodh alarm list was empty")
610 for line in stdout.splitlines():
611 line = line.replace('|', "")
612 metric_id = line.split()[0]
613 stdout = node.run_cmd(
614 'source overcloudrc.v3; aodh alarm show {}' .format(
617 self.__logger.info("aodh alarm list was empty")
619 for line in stdout.splitlines()[3: -1]:
620 line = line.replace('|', "")
621 if line.split()[0] == 'state_timestamp':
622 timestamps1 = line.split()[1]
623 self.trigger_alarm_update(criteria_list, compute_node)
625 stdout = node.run_cmd(
626 "source overcloudrc.v3; aodh alarm show {}" .format(
629 self.__logger.info("aodh alarm list was empty")
631 for line in stdout.splitlines()[3:-1]:
632 line = line.replace('|', "")
633 if line.split()[0] == 'state_timestamp':
634 timestamps2 = line.split()[1]
635 if timestamps1 == timestamps2:
637 "Data not updated after interval of 12 seconds")
640 self.__logger.info("PASS")
643 def test_plugins_with_gnocchi(
644 self, compute, plugin_interval, logger,
650 nodes = get_apex_nodes()
651 if plugin_interval > 15:
652 sleep_time = plugin_interval*2
657 if node.is_controller():
658 self.__logger.info('Getting gnocchi metric list on {}' .format(
659 (node.get_dict()['name'])))
660 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
661 stdout = node.run_cmd(
662 "source overcloudrc.v3;"
663 + "gnocchi metric list | grep {0} | grep {1}"
664 .format(criteria_list, compute))
666 self.__logger.info("gnocchi list was empty")
668 for line in stdout.splitlines():
669 line = line.replace('|', "")
670 metric_id = line.split()[0]
671 stdout = node.run_cmd(
672 'source overcloudrc.v3;gnocchi measures show {}'.format(
675 self.__logger.info("gnocchi list was empty")
677 for line in stdout.splitlines()[3: -1]:
681 timestamps1 = line.replace('|', "")
682 timestamps1 = timestamps1.split()[0]
683 time.sleep(sleep_time)
684 stdout = node.run_cmd(
685 "source overcloudrc.v3;gnocchi measures show {}".format(
688 self.__logger.info("gnocchi measures was empty")
690 for line in stdout.splitlines()[3:-1]:
694 timestamps2 = line.replace('|', "")
695 timestamps2 = timestamps2.split()[0]
696 if timestamps1 == timestamps2:
698 "Plugin Interval is {}" .format(plugin_interval))
700 "Data not updated after {} seconds".format(
704 self.__logger.info("PASS")
708 def test_plugins_with_snmp(
709 self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
710 snmp_mib_strings=[], snmp_in_commands=[]):
712 if plugin in ('hugepages', 'intel_rdt', 'mcelog'):
713 nodes = get_apex_nodes()
715 if compute == node.get_dict()['name']:
716 stdout = node.run_cmd(
717 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
718 snmp_mib_files, snmp_mib_strings))
719 self.__logger.info("{}" .format(stdout))
721 self.__logger.info("No output from snmpwalk")
723 elif 'OID' in stdout:
724 self.__logger.info("SNMP query failed")
727 counter1 = stdout.split()[3]
729 stdout = node.run_cmd(
730 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
731 snmp_mib_files, snmp_mib_strings))
732 self.__logger.info("{}" .format(stdout))
734 self.__logger.info("No output from snmpwalk")
735 elif 'OID' in stdout:
737 "SNMP query failed during second check")
738 self.__logger.info("waiting for 10 sec")
740 stdout = node.run_cmd(
741 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
742 snmp_mib_files, snmp_mib_strings))
743 self.__logger.info("{}" .format(stdout))
745 self.__logger.info("No output from snmpwalk")
746 elif 'OID' in stdout:
747 self.__logger.info("SNMP query failed again")
748 self.__logger.info("Failing this test case")
751 counter2 = stdout.split()[3]
753 if counter1 == counter2:
760 def check_dma_dummy_included(self, compute, name):
761 """Check if dummy collectd config by DMA
762 is included in collectd.conf file.
765 compute -- compute node instance
766 name -- config file name
768 compute_name = compute.get_name()
769 nodes = get_apex_nodes()
771 if compute_name == node.get_dict()['name']:
772 dummy_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
773 if name + '.conf' not in dummy_conf:
774 self.__logger.error('check conf FAIL')
777 self.__logger.info('check conf PASS')
778 fullpath = '/etc/collectd/collectd.conf.d/{}'.format(
780 self.__logger.info('Delete file {}'.format(fullpath))
781 node.run_cmd('sudo rm -f ' + fullpath)
783 self.__logger.error('Some panic, compute not found')
786 def create_testvm(self, compute_node, test_name):
787 nodes = get_apex_nodes()
788 compute_name = compute_node.get_name()
790 controller_node = None
792 if node.is_controller():
793 controller_node = node
796 self.__logger.debug('Creating Test VM on {}' .format(compute_name))
797 self.__logger.debug('Create command is executed in {}' .format(
798 (controller_node.get_dict()['name'])))
800 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
801 node.put_file(TEST_VM_IMAGE_PATH, TEST_VM_IMAGE)
802 image = controller_node.run_cmd(
803 'source overcloudrc.v3;'
804 'openstack image create -f value -c id'
805 ' --disk-format qcow2 --file {0} {1}'
806 .format(TEST_VM_IMAGE, test_name))
807 flavor = controller_node.run_cmd(
808 'source overcloudrc.v3;'
809 'openstack flavor create -f value -c id {}'
811 host = controller_node.run_cmd(
812 'source overcloudrc.v3;'
813 'openstack hypervisor list -f value -c "Hypervisor Hostname"'
815 .format(compute_name))
816 server = controller_node.run_cmd(
817 'source overcloudrc.v3;'
818 'openstack server create -f value -c id'
819 ' --image {0} --flavor {1} --availability-zone {2} {3}'
820 .format(image, flavor, 'nova:' + host, test_name))
822 resources = {"image": image, "flavor": flavor, "server": server}
825 self.__logger.debug('VM created')
826 self.__logger.debug('VM info: {}'.format(resources))
830 def delete_testvm(self, resources):
831 nodes = get_apex_nodes()
833 controller_node = None
835 if node.is_controller():
836 controller_node = node
839 self.__logger.debug('Deleteing Test VM')
840 self.__logger.debug('VM to be deleted info: {}'.format(resources))
841 self.__logger.debug('Delete command is executed in {}' .format(
842 (controller_node.get_dict()['name'])))
844 server = resources.get('server', None)
845 flavor = resources.get('flavor', None)
846 image = resources.get('image', None)
848 controller_node.run_cmd(
849 'source overcloudrc.v3;'
850 'openstack server delete {}'.format(server))
852 controller_node.run_cmd(
853 'source overcloudrc.v3;'
854 'openstack flavor delete {}'.format(flavor))
856 controller_node.run_cmd(
857 'source overcloudrc.v3;'
858 'openstack image delete {}'.format(image))
860 self.__logger.debug('VM and other OpenStack resources deleted')
862 def test_dma_infofetch_get_data(self, compute, test_name):
863 compute_name = compute.get_name()
864 nodes = get_apex_nodes()
866 if compute_name == node.get_dict()['name']:
867 stdout = node.run_cmd(
868 'redis-cli keys "barometer-dma/vm/*/vminfo"'
869 ' | while read k; do redis-cli get $k; done'
870 ' | grep {}'.format(test_name))
871 self.__logger.debug('InfoFetch data: {}'.format(stdout))
872 if stdout and test_name in stdout:
873 self.__logger.info('PASS')
876 self.__logger.info('No test vm info')
878 self.__logger.info('FAIL')