1 # -*- coding: utf-8 -*-
3 # Copyright(c) 2017-2019 Intel Corporation and OPNFV. All rights reserved.
5 # Licensed under the Apache License, Version 2.0 (the "License"); you may
6 # not use this file except in compliance with the License. You may obtain
7 # a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 # License for the specific language governing permissions and limitations
18 """Classes used by collectd.py"""
26 from opnfv.deployment import factory
28 from functest.utils import constants
30 ID_RSA_PATH = '/root/.ssh/id_rsa'
31 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
32 DEF_PLUGIN_INTERVAL = 10
33 COLLECTD_CONF = '/etc/collectd.conf'
34 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
35 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
36 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
37 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
39 APEX_USER_STACK = 'stack'
40 APEX_PKEY = '/root/.ssh/id_rsa'
41 TEST_VM_IMAGE = 'cirros-0.4.0-x86_64-disk.img'
42 TEST_VM_IMAGE_PATH = '/home/opnfv/functest/images/' + TEST_VM_IMAGE
46 """Node configuration class"""
47 def __init__(self, attrs):
48 self.__null = attrs[0]
50 self.__name = attrs[2]
51 self.__status = attrs[3] if attrs[3] else None
52 self.__taskState = attrs[4]
53 self.__pwrState = attrs[5]
54 self.__ip = re.sub('^[a-z]+=', '', attrs[6])
65 """Get node IP address"""
74 handler = factory.Factory.get_handler('apex',
78 nodes = handler.get_nodes()
82 class ConfigServer(object):
83 """Class to get env configuration"""
84 def __init__(self, host, user, logger, priv_key=None):
88 self.__priv_key = priv_key
90 self.__logger = logger
92 self.__private_key_file = ID_RSA_PATH
93 if not os.path.isfile(self.__private_key_file):
95 "Private key file '{}'".format(self.__private_key_file)
97 raise IOError("Private key file '{}' not found.".format(
98 self.__private_key_file))
100 # get list of available nodes
101 ssh, sftp = self.__open_sftp_session(
102 self.__host, self.__user, self.__passwd)
104 fuel_node_passed = False
106 while (attempt <= 10) and not fuel_node_passed:
107 stdin, stdout, stderr = ssh.exec_command(
108 "source stackrc; nova list")
109 stderr_lines = stderr.readlines()
111 self.__logger.warning(
112 "'Apex node' command failed (try {}):".format(attempt))
113 for line in stderr_lines:
114 self.__logger.debug(line.strip())
116 fuel_node_passed = True
119 "'Apex node' command passed (try {})".format(attempt))
121 if not fuel_node_passed:
123 "'Apex node' command failed. This was the last try.")
125 "'Apex node' command failed. This was the last try.")
126 node_table = stdout.readlines()\
128 # skip table title and parse table values
130 for entry in node_table[3:]:
131 if entry[0] == '+' or entry[0] == '\n':
136 Node([str(x.strip(' \n')) for x in entry.split('|')]))
138 def get_controllers(self):
139 # Get list of controllers
140 print self.__nodes[0]._Node__ip
142 [node for node in self.__nodes if 'controller' in node.get_name()])
144 def get_computes(self):
145 # Get list of computes
147 [node for node in self.__nodes if 'compute' in node.get_name()])
153 def __open_sftp_session(self, host, user, passwd=None):
154 # Connect to given host.
155 """Keyword arguments:
156 host -- host to connect
158 passwd -- password to use
160 Return tuple of SSH and SFTP client instances.
163 ssh = paramiko.SSHClient()
164 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
166 # try a direct access using password or private key
167 if not passwd and not self.__priv_key:
169 self.__priv_key = paramiko.RSAKey.from_private_key_file(
170 self.__private_key_file)
172 # connect to the server
174 host, username=user, password=passwd, pkey=self.__priv_key)
175 sftp = ssh.open_sftp()
177 # return SFTP client instance
180 def get_plugin_interval(self, compute, plugin):
181 """Find the plugin interval in collectd configuration.
184 compute -- compute node instance
185 plugin -- plug-in name
187 If found, return interval value, otherwise the default value"""
188 default_interval = DEF_PLUGIN_INTERVAL
189 compute_name = compute.get_name()
190 nodes = get_apex_nodes()
192 if compute_name == node.get_dict()['name']:
193 stdout = node.run_cmd(
194 'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
196 return default_interval
197 for line in stdout.split('\n'):
198 if 'Interval' in line:
200 return default_interval
202 def get_plugin_config_values(self, compute, plugin, parameter):
203 """Get parameter values from collectd config file.
206 compute -- compute node instance
207 plugin -- plug-in name
208 parameter -- plug-in parameter
210 Return list of found values."""
212 compute_name = compute.get_name()
213 nodes = get_apex_nodes()
215 if compute_name == node.get_dict()['name']:
216 stdout = node.run_cmd(
217 'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
219 return default_values
220 for line in stdout.split('\n'):
221 if 'Interfaces' in line:
222 return line.split(' ', 1)[1]
223 elif 'Bridges' in line:
224 return line.split(' ', 1)[1]
225 elif 'Cores' in line:
226 return line.split(' ', 1)[1]
229 return default_values
231 def execute_command(self, command, host_ip=None, ssh=None):
232 """Execute command on node and return list of lines of standard output.
236 host_ip -- IP of the node
237 ssh -- existing open SSH session to use
239 One of host_ip or ssh must not be None. If both are not None,
240 existing ssh session is used.
242 if host_ip is None and ssh is None:
243 raise ValueError('One of host_ip or ssh must not be None.')
245 ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
246 stdin, stdout, stderr = ssh.exec_command(command)
247 return stdout.readlines()
249 def get_ovs_interfaces(self, compute):
250 """Get list of configured OVS interfaces
253 compute -- compute node instance
255 compute_name = compute.get_name()
256 nodes = get_apex_nodes()
258 if compute_name == node.get_dict()['name']:
259 stdout = node.run_cmd('sudo ovs-vsctl list-br')
262 def is_gnocchi_running(self, controller):
263 """Check whether Gnocchi is running on controller.
266 controller -- controller node instance
268 Return boolean value whether Gnocchi is running.
270 gnocchi_present = False
271 controller_name = controller.get_name()
272 nodes = get_apex_nodes()
274 if controller_name == node.get_dict()['name']:
275 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
276 stdout = node.run_cmd(
277 "source overcloudrc.v3;"
278 + "openstack catalog list | grep gnocchi")
281 elif 'gnocchi' in stdout:
282 gnocchi_present = True
283 return gnocchi_present
286 return gnocchi_present
288 def is_aodh_running(self, controller):
289 """Check whether aodh service is running on controller
292 controller_name = controller.get_name()
293 nodes = get_apex_nodes()
295 if controller_name == node.get_dict()['name']:
296 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
297 stdout = node.run_cmd(
298 "source overcloudrc.v3;"
299 + "openstack catalog list | grep aodh")
302 elif 'aodh' in stdout:
309 def is_redis_running(self, compute):
310 """Check whether redis service is running on compute"""
311 compute_name = compute.get_name()
312 nodes = get_apex_nodes()
314 if compute_name == node.get_dict()['name']:
315 stdout = node.run_cmd('sudo systemctl status docker'
317 '| grep barometer-redis')
318 if stdout and 'barometer-redis' in stdout:
320 'Redis is running in node {}'.format(
324 'Redis is *not* running in node {}'.format(
328 def is_dma_server_running(self, compute):
329 """Check whether DMA server is running on compute"""
330 compute_name = compute.get_name()
331 nodes = get_apex_nodes()
333 if compute_name == node.get_dict()['name']:
334 stdout = node.run_cmd('sudo systemctl status docker'
336 '| grep opnfv/barometer-dma')
337 if stdout and '/server' in stdout:
339 'DMA Server is running in node {}'.format(
343 'DMA Server is *not* running in node {}'.format(
347 def is_dma_infofetch_running(self, compute):
348 """Check whether DMA infofetch is running on compute"""
349 compute_name = compute.get_name()
350 nodes = get_apex_nodes()
352 if compute_name == node.get_dict()['name']:
353 stdout = node.run_cmd('sudo systemctl status docker'
355 '| grep opnfv/barometer-dma')
356 if stdout and '/infofetch' in stdout:
358 'DMA InfoFetch is running in node {}'.format(
362 'DMA InfoFetch is *not* running in node {}'.format(
366 def get_dma_config(self, compute):
367 """Get config values of DMA"""
368 compute_name = compute.get_name()
369 nodes = get_apex_nodes()
371 if compute_name == node.get_dict()['name']:
372 # We use following after functest accept python-toml
373 # stdout = node.run_cmd(
374 # 'cat /etc/barometer-dma/config.toml')
376 # agent_conf = toml.loads(stdout)
377 # except (TypeError, TomlDecodeError) as e:
378 # self.__logger.error(
379 # 'DMA config error: {}'.format(e))
384 'egrep "listen_port|amqp_"'
385 ' /etc/barometer-dma/config.toml'
386 '| sed -e "s/#.*$//" | sed -e "s/=/:/"'
388 stdout = node.run_cmd(readcmd)
389 agent_conf = {"server": yaml.safe_load(stdout)}
392 'ping -n -c1 ' + agent_conf["server"]["amqp_host"] +
393 '| sed -ne "s/^.*bytes from //p" | sed -e "s/:.*//"'
395 agent_conf["server"]["amqp_host"] = node.run_cmd(pingcmd)
400 def is_mcelog_installed(self, compute, package):
401 """Check whether package exists on compute node.
404 compute -- compute node instance
405 package -- Linux package to search for
407 Return boolean value whether package is installed.
409 compute_name = compute.get_name()
410 nodes = get_apex_nodes()
412 if compute_name == node.get_dict()['name']:
413 stdout = node.run_cmd(
414 'rpm -qa | grep mcelog')
417 elif 'mcelog' in stdout:
422 def is_rdt_available(self, compute):
423 """Check whether the compute node is a virtual machine."""
424 compute_name = compute.get_name()
425 nodes = get_apex_nodes()
427 if compute_name == node.get_dict()['name']:
428 stdout = node.run_cmd('cat /proc/cpuinfo | grep hypervisor')
429 if 'hypervisor' in stdout:
433 def is_libpqos_on_node(self, compute):
434 """Check whether libpqos is present on compute node"""
436 compute_name = compute.get_name()
437 nodes = get_apex_nodes()
439 if compute_name == node.get_dict()['name']:
440 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
441 if 'libpqos' in stdout:
445 def check_aodh_plugin_included(self, compute):
446 """Check if aodh plugin is included in collectd.conf file.
447 If not, try to enable it.
450 compute -- compute node instance
452 Return boolean value whether AODH plugin is included
453 or it's enabling was successful.
455 compute_name = compute.get_name()
456 nodes = get_apex_nodes()
458 if compute_name == node.get_dict()['name']:
459 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
460 if 'aodh.conf' not in aodh_conf:
462 "AODH Plugin not included in {}".format(compute_name))
466 "AODH plugin present in compute node {}" .format(
471 def check_gnocchi_plugin_included(self, compute):
472 """Check if gnocchi plugin is included in collectd.conf file.
473 If not, try to enable it.
476 compute -- compute node instance
478 Return boolean value whether gnocchi plugin is included
479 or it's enabling was successful.
481 compute_name = compute.get_name()
482 nodes = get_apex_nodes()
484 if compute_name == node.get_dict()['name']:
485 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
486 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
488 "Gnocchi Plugin not included in node {}".format(
493 "Gnocchi plugin available in compute node {}" .format(
498 def check_snmp_plugin_included(self, compute):
499 """Check if SNMP plugin is active in compute node.
501 snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
502 snmp_string = 'INTEL-RDT-MIB::intelRdt'
503 compute_name = compute.get_name()
504 nodes = get_apex_nodes()
506 if compute_name == node.get_dict()['name']:
507 stdout = node.run_cmd(
508 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
509 snmp_mib, snmp_string))
510 self.__logger.info("snmp output = {}" .format(stdout))
517 self, compute, plugins, error_plugins, create_backup=True):
518 """Enable plugins on compute node
521 compute -- compute node instance
522 plugins -- list of plugins to be enabled
524 Return boolean value indicating whether function was successful.
526 csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
527 plugins = sorted(plugins)
528 compute_name = compute.get_name()
529 nodes = get_apex_nodes()
531 if compute_name == node.get_dict()['name']:
532 node.put_file(csv_file, 'csv.conf')
535 + '/etc/collectd/collectd.conf.d/csv.conf')
538 def restart_collectd(self, compute):
539 """Restart collectd on compute node.
542 compute -- compute node instance
544 Retrun tuple with boolean indicating success and list of warnings
545 received during collectd start.
547 compute_name = compute.get_name()
548 nodes = get_apex_nodes()
550 def get_collectd_processes(compute_node):
551 """Get number of running collectd processes.
554 ssh_session -- instance of SSH session in which to check
557 stdout = compute_node.run_cmd("pgrep collectd")
561 if compute_name == node.get_dict()['name']:
562 # node.run_cmd('su; "opnfvapex"')
563 self.__logger.info('Stopping collectd service...')
564 node.run_cmd('sudo systemctl stop collectd')
566 if get_collectd_processes(node):
567 self.__logger.error('Collectd is still running...')
569 self.__logger.info('Starting collectd service...')
570 stdout = node.run_cmd('sudo systemctl start collectd')
573 output.strip() for output in stdout if 'WARN: ' in output]
574 if get_collectd_processes(node) == 0:
575 self.__logger.error('Collectd is still not running...')
576 return False, warning
579 def trigger_alarm_update(self, alarm, compute_node):
580 # TODO: move these actions to main, with criteria lists so that we can reference that
581 # i.e. test_plugin_with_aodh(self, compute, plugin.., logger, criteria_list, alarm_action)
582 if alarm == 'mcelog':
583 compute_node.run_cmd('sudo modprobe mce-inject')
584 compute_node.run_cmd('sudo ./mce-inject_ea < corrected')
585 if alarm == 'ovs_events':
586 compute_node.run_cmd('sudo ifconfig -a | grep br0')
587 compute_node.run_cmd('sudo ifconfig br0 down; sudo ifconfig br0 up')
589 def test_plugins_with_aodh(
590 self, compute, plugin_interval, logger,
596 nodes = get_apex_nodes()
597 compute_node = [node for node in nodes if node.get_dict()['name'] == compute][0]
599 if node.is_controller():
600 self.__logger.info('Getting AODH Alarm list on {}' .format(
601 (node.get_dict()['name'])))
602 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
603 self.trigger_alarm_update(criteria_list, compute_node)
604 stdout = node.run_cmd(
605 "source overcloudrc.v3;"
606 + "aodh alarm list | grep {0} | grep {1}"
607 .format(criteria_list, compute))
609 self.__logger.info("aodh alarm list was empty")
611 for line in stdout.splitlines():
612 line = line.replace('|', "")
613 metric_id = line.split()[0]
614 stdout = node.run_cmd(
615 'source overcloudrc.v3; aodh alarm show {}' .format(
618 self.__logger.info("aodh alarm list was empty")
620 for line in stdout.splitlines()[3: -1]:
621 line = line.replace('|', "")
622 if line.split()[0] == 'state_timestamp':
623 timestamps1 = line.split()[1]
624 self.trigger_alarm_update(criteria_list, compute_node)
626 stdout = node.run_cmd(
627 "source overcloudrc.v3; aodh alarm show {}" .format(
630 self.__logger.info("aodh alarm list was empty")
632 for line in stdout.splitlines()[3:-1]:
633 line = line.replace('|', "")
634 if line.split()[0] == 'state_timestamp':
635 timestamps2 = line.split()[1]
636 if timestamps1 == timestamps2:
638 "Data not updated after interval of 12 seconds")
641 self.__logger.info("PASS")
644 def test_plugins_with_gnocchi(
645 self, compute, plugin_interval, logger,
651 nodes = get_apex_nodes()
652 if plugin_interval > 15:
653 sleep_time = plugin_interval*2
658 if node.is_controller():
659 self.__logger.info('Getting gnocchi metric list on {}' .format(
660 (node.get_dict()['name'])))
661 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
662 stdout = node.run_cmd(
663 "source overcloudrc.v3;"
664 + "gnocchi metric list | grep {0} | grep {1}"
665 .format(criteria_list, compute))
667 self.__logger.info("gnocchi list was empty")
669 for line in stdout.splitlines():
670 line = line.replace('|', "")
671 metric_id = line.split()[0]
672 stdout = node.run_cmd(
673 'source overcloudrc.v3;gnocchi measures show {}'.format(
676 self.__logger.info("gnocchi list was empty")
678 for line in stdout.splitlines()[3: -1]:
682 timestamps1 = line.replace('|', "")
683 timestamps1 = timestamps1.split()[0]
684 time.sleep(sleep_time)
685 stdout = node.run_cmd(
686 "source overcloudrc.v3;gnocchi measures show {}".format(
689 self.__logger.info("gnocchi measures was empty")
691 for line in stdout.splitlines()[3:-1]:
695 timestamps2 = line.replace('|', "")
696 timestamps2 = timestamps2.split()[0]
697 if timestamps1 == timestamps2:
699 "Plugin Interval is {}" .format(plugin_interval))
701 "Data not updated after {} seconds".format(
705 self.__logger.info("PASS")
709 def test_plugins_with_snmp(
710 self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
711 snmp_mib_strings=[], snmp_in_commands=[]):
713 if plugin in ('hugepages', 'intel_rdt', 'mcelog'):
714 nodes = get_apex_nodes()
716 if compute == node.get_dict()['name']:
717 stdout = node.run_cmd(
718 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
719 snmp_mib_files, snmp_mib_strings))
720 self.__logger.info("{}" .format(stdout))
722 self.__logger.info("No output from snmpwalk")
724 elif 'OID' in stdout:
725 self.__logger.info("SNMP query failed")
728 counter1 = stdout.split()[3]
730 stdout = node.run_cmd(
731 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
732 snmp_mib_files, snmp_mib_strings))
733 self.__logger.info("{}" .format(stdout))
735 self.__logger.info("No output from snmpwalk")
736 elif 'OID' in stdout:
738 "SNMP query failed during second check")
739 self.__logger.info("waiting for 10 sec")
741 stdout = node.run_cmd(
742 'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
743 snmp_mib_files, snmp_mib_strings))
744 self.__logger.info("{}" .format(stdout))
746 self.__logger.info("No output from snmpwalk")
747 elif 'OID' in stdout:
748 self.__logger.info("SNMP query failed again")
749 self.__logger.info("Failing this test case")
752 counter2 = stdout.split()[3]
754 if counter1 == counter2:
761 def check_dma_dummy_included(self, compute, name):
762 """Check if dummy collectd config by DMA
763 is included in collectd.conf file.
766 compute -- compute node instance
767 name -- config file name
769 compute_name = compute.get_name()
770 nodes = get_apex_nodes()
772 if compute_name == node.get_dict()['name']:
773 dummy_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
774 if name + '.conf' not in dummy_conf:
775 self.__logger.error('check conf FAIL')
778 self.__logger.info('check conf PASS')
779 fullpath = '/etc/collectd/collectd.conf.d/{}'.format(
781 self.__logger.info('Delete file {}'.format(fullpath))
782 node.run_cmd('sudo rm -f ' + fullpath)
784 self.__logger.error('Some panic, compute not found')
787 def create_testvm(self, compute_node, test_name):
788 nodes = get_apex_nodes()
789 compute_name = compute_node.get_name()
791 controller_node = None
793 if node.is_controller():
794 controller_node = node
797 self.__logger.debug('Creating Test VM on {}' .format(compute_name))
798 self.__logger.debug('Create command is executed in {}' .format(
799 (controller_node.get_dict()['name'])))
801 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
802 node.put_file(TEST_VM_IMAGE_PATH, TEST_VM_IMAGE)
803 image = controller_node.run_cmd(
804 'source overcloudrc.v3;'
805 'openstack image create -f value -c id'
806 ' --disk-format qcow2 --file {0} {1}'
807 .format(TEST_VM_IMAGE, test_name))
808 flavor = controller_node.run_cmd(
809 'source overcloudrc.v3;'
810 'openstack flavor create -f value -c id {}'
812 host = controller_node.run_cmd(
813 'source overcloudrc.v3;'
814 'openstack hypervisor list -f value -c "Hypervisor Hostname"'
816 .format(compute_name))
817 server = controller_node.run_cmd(
818 'source overcloudrc.v3;'
819 'openstack server create -f value -c id'
820 ' --image {0} --flavor {1} --availability-zone {2} {3}'
821 .format(image, flavor, 'nova:' + host, test_name))
823 resources = {"image": image, "flavor": flavor, "server": server}
826 self.__logger.debug('VM created')
827 self.__logger.debug('VM info: {}'.format(resources))
831 def delete_testvm(self, resources):
832 nodes = get_apex_nodes()
834 controller_node = None
836 if node.is_controller():
837 controller_node = node
840 self.__logger.debug('Deleteing Test VM')
841 self.__logger.debug('VM to be deleted info: {}'.format(resources))
842 self.__logger.debug('Delete command is executed in {}' .format(
843 (controller_node.get_dict()['name'])))
845 server = resources.get('server', None)
846 flavor = resources.get('flavor', None)
847 image = resources.get('image', None)
849 controller_node.run_cmd(
850 'source overcloudrc.v3;'
851 'openstack server delete {}'.format(server))
853 controller_node.run_cmd(
854 'source overcloudrc.v3;'
855 'openstack flavor delete {}'.format(flavor))
857 controller_node.run_cmd(
858 'source overcloudrc.v3;'
859 'openstack image delete {}'.format(image))
861 self.__logger.debug('VM and other OpenStack resources deleted')
863 def test_dma_infofetch_get_data(self, compute, test_name):
864 compute_name = compute.get_name()
865 nodes = get_apex_nodes()
867 if compute_name == node.get_dict()['name']:
868 stdout = node.run_cmd(
869 'redis-cli keys "barometer-dma/vm/*/vminfo"'
870 ' | while read k; do redis-cli get $k; done'
871 ' | grep {}'.format(test_name))
872 self.__logger.debug('InfoFetch data: {}'.format(stdout))
873 if stdout and test_name in stdout:
874 self.__logger.info('PASS')
877 self.__logger.info('No test vm info')
879 self.__logger.info('FAIL')