[functest] Update tests for Aodh
[barometer.git] / baro_tests / config_server.py
1 # -*- coding: utf-8 -*-
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 # not use this file except in compliance with the License. You may obtain
5 # a copy of the License at
6 #
7 #      http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 # License for the specific language governing permissions and limitations
13 # under the License.
14
15 """Classes used by collectd.py"""
16
17 import time
18 import os.path
19 import os
20 import re
21
22 from opnfv.deployment import factory
23 import paramiko
24 from functest.utils import constants
25
26 ID_RSA_PATH = '/root/.ssh/id_rsa'
27 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
28 DEF_PLUGIN_INTERVAL = 10
29 COLLECTD_CONF = '/etc/collectd.conf'
30 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
31 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
32 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
33 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
34 APEX_USER = 'root'
35 APEX_USER_STACK = 'stack'
36 APEX_PKEY = '/root/.ssh/id_rsa'
37
38
39 class Node(object):
40     """Node configuration class"""
41     def __init__(self, attrs):
42         self.__null = attrs[0]
43         self.__id = attrs[1]
44         self.__name = attrs[2]
45         self.__status = attrs[3] if attrs[3] else None
46         self.__taskState = attrs[4]
47         self.__pwrState = attrs[5]
48         self.__ip = re.sub('^[a-z]+=', '', attrs[6])
49
50     def get_name(self):
51         """Get node name"""
52         return self.__name
53
54     def get_id(self):
55         """Get node ID"""
56         return self.__id
57
58     def get_ip(self):
59         """Get node IP address"""
60         return self.__ip
61
62     def get_roles(self):
63         """Get node role"""
64         return self.__roles
65
66
67 def get_apex_nodes():
68     handler = factory.Factory.get_handler('apex',
69                                           APEX_IP,
70                                           APEX_USER_STACK,
71                                           APEX_PKEY)
72     nodes = handler.get_nodes()
73     return nodes
74
75
76 class ConfigServer(object):
77     """Class to get env configuration"""
78     def __init__(self, host, user, logger, priv_key=None):
79         self.__host = host
80         self.__user = user
81         self.__passwd = None
82         self.__priv_key = priv_key
83         self.__nodes = list()
84         self.__logger = logger
85
86         self.__private_key_file = ID_RSA_PATH
87         if not os.path.isfile(self.__private_key_file):
88             self.__logger.error(
89                 "Private key file '{}'".format(self.__private_key_file)
90                 + " not found.")
91             raise IOError("Private key file '{}' not found.".format(
92                 self.__private_key_file))
93
94         # get list of available nodes
95         ssh, sftp = self.__open_sftp_session(
96             self.__host, self.__user, self.__passwd)
97         attempt = 1
98         fuel_node_passed = False
99
100         while (attempt <= 10) and not fuel_node_passed:
101             stdin, stdout, stderr = ssh.exec_command(
102                 "source stackrc; nova list")
103             stderr_lines = stderr.readlines()
104             if stderr_lines:
105                 self.__logger.warning(
106                     "'Apex node' command failed (try {}):".format(attempt))
107                 for line in stderr_lines:
108                     self.__logger.debug(line.strip())
109             else:
110                 fuel_node_passed = True
111                 if attempt > 1:
112                     self.__logger.info(
113                         "'Apex node' command passed (try {})".format(attempt))
114             attempt += 1
115         if not fuel_node_passed:
116             self.__logger.error(
117                 "'Apex node' command failed. This was the last try.")
118             raise OSError(
119                 "'Apex node' command failed. This was the last try.")
120         node_table = stdout.readlines()\
121
122         # skip table title and parse table values
123
124         for entry in node_table[3:]:
125             if entry[0] == '+' or entry[0] == '\n':
126                 print entry
127                 pass
128             else:
129                 self.__nodes.append(
130                     Node([str(x.strip(' \n')) for x in entry.split('|')]))
131
132     def get_controllers(self):
133         # Get list of controllers
134         print self.__nodes[0]._Node__ip
135         return (
136             [node for node in self.__nodes if 'controller' in node.get_name()])
137
138     def get_computes(self):
139         # Get list of computes
140         return (
141             [node for node in self.__nodes if 'compute' in node.get_name()])
142
143     def get_nodes(self):
144         # Get list of nodes
145         return self.__nodes
146
147     def __open_sftp_session(self, host, user, passwd=None):
148         # Connect to given host.
149         """Keyword arguments:
150         host -- host to connect
151         user -- user to use
152         passwd -- password to use
153
154         Return tuple of SSH and SFTP client instances.
155         """
156         # create SSH client
157         ssh = paramiko.SSHClient()
158         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
159
160         # try a direct access using password or private key
161         if not passwd and not self.__priv_key:
162             # get private key
163             self.__priv_key = paramiko.RSAKey.from_private_key_file(
164                 self.__private_key_file)
165
166         # connect to the server
167         ssh.connect(
168             host, username=user, password=passwd, pkey=self.__priv_key)
169         sftp = ssh.open_sftp()
170
171         # return SFTP client instance
172         return ssh, sftp
173
174     def get_plugin_interval(self, compute, plugin):
175         """Find the plugin interval in collectd configuration.
176
177         Keyword arguments:
178         compute -- compute node instance
179         plugin -- plug-in name
180
181         If found, return interval value, otherwise the default value"""
182         default_interval = DEF_PLUGIN_INTERVAL
183         compute_name = compute.get_name()
184         nodes = get_apex_nodes()
185         for node in nodes:
186             if compute_name == node.get_dict()['name']:
187                 stdout = node.run_cmd(
188                     'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
189                 if stdout is None:
190                     return default_interval
191                 for line in stdout.split('\n'):
192                     if 'Interval' in line:
193                         return 1
194         return default_interval
195
196     def get_plugin_config_values(self, compute, plugin, parameter):
197         """Get parameter values from collectd config file.
198
199         Keyword arguments:
200         compute -- compute node instance
201         plugin -- plug-in name
202         parameter -- plug-in parameter
203
204         Return list of found values."""
205         default_values = []
206         compute_name = compute.get_name()
207         nodes = get_apex_nodes()
208         for node in nodes:
209             if compute_name == node.get_dict()['name']:
210                 stdout = node.run_cmd(
211                     'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
212                 if stdout is None:
213                     return default_values
214                 for line in stdout.split('\n'):
215                     if 'Interfaces' in line:
216                         return line.split(' ', 1)[1]
217                     elif 'Bridges' in line:
218                         return line.split(' ', 1)[1]
219                     elif 'Cores' in line:
220                         return line.split(' ', 1)[1]
221                     else:
222                         pass
223         return default_values
224
225     def execute_command(self, command, host_ip=None, ssh=None):
226         """Execute command on node and return list of lines of standard output.
227
228         Keyword arguments:
229         command -- command
230         host_ip -- IP of the node
231         ssh -- existing open SSH session to use
232
233         One of host_ip or ssh must not be None. If both are not None,
234         existing ssh session is used.
235         """
236         if host_ip is None and ssh is None:
237             raise ValueError('One of host_ip or ssh must not be None.')
238         if ssh is None:
239             ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
240         stdin, stdout, stderr = ssh.exec_command(command)
241         return stdout.readlines()
242
243     def get_ovs_interfaces(self, compute):
244         """Get list of configured OVS interfaces
245
246         Keyword arguments:
247         compute -- compute node instance
248         """
249         compute_name = compute.get_name()
250         nodes = get_apex_nodes()
251         for node in nodes:
252             if compute_name == node.get_dict()['name']:
253                 stdout = node.run_cmd('sudo ovs-vsctl list-br')
254         return stdout
255
256     def is_gnocchi_running(self, controller):
257         """Check whether Gnocchi is running on controller.
258
259         Keyword arguments:
260         controller -- controller node instance
261
262         Return boolean value whether Gnocchi is running.
263         """
264         gnocchi_present = False
265         controller_name = controller.get_name()
266         nodes = get_apex_nodes()
267         for node in nodes:
268             if controller_name == node.get_dict()['name']:
269                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
270                 stdout = node.run_cmd(
271                     "source overcloudrc.v3;"
272                     + "openstack catalog list | grep gnocchi")
273                 if stdout is None:
274                     return False
275                 elif 'gnocchi' in stdout:
276                     gnocchi_present = True
277                     return gnocchi_present
278                 else:
279                     return False
280         return gnocchi_present
281
282     def is_aodh_running(self, controller):
283         """Check whether aodh service is running on controller
284         """
285         aodh_present = False
286         controller_name = controller.get_name()
287         nodes = get_apex_nodes()
288         for node in nodes:
289             if controller_name == node.get_dict()['name']:
290                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
291                 stdout = node.run_cmd(
292                     "source overcloudrc.v3;"
293                     + "openstack catalog list | grep aodh")
294                 if stdout is None:
295                     return False
296                 elif 'aodh' in stdout:
297                     aodh_present = True
298                     return aodh_present
299                 else:
300                     return False
301         return aodh_present
302
303     def is_mcelog_installed(self, compute, package):
304         """Check whether package exists on compute node.
305
306         Keyword arguments:
307         compute -- compute node instance
308         package -- Linux package to search for
309
310         Return boolean value whether package is installed.
311         """
312         compute_name = compute.get_name()
313         nodes = get_apex_nodes()
314         for node in nodes:
315             if compute_name == node.get_dict()['name']:
316                 stdout = node.run_cmd(
317                     'rpm -qa | grep mcelog')
318                 if stdout is None:
319                     return 0
320                 elif 'mcelog' in stdout:
321                     return 1
322                 else:
323                     return 0
324
325     def is_rdt_available(self, compute):
326         """Check whether the compute node is a virtual machine."""
327         compute_name = compute.get_name()
328         nodes = get_apex_nodes()
329         for node in nodes:
330             if compute_name == node.get_dict()['name']:
331                 stdout = node.run_cmd('cat /proc/cpuinfo | grep hypervisor')
332                 if 'hypervisor' in stdout:
333                     return False
334         return True
335
336     def is_libpqos_on_node(self, compute):
337         """Check whether libpqos is present on compute node"""
338
339         compute_name = compute.get_name()
340         nodes = get_apex_nodes()
341         for node in nodes:
342             if compute_name == node.get_dict()['name']:
343                 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
344                 if 'libpqos' in stdout:
345                     return True
346         return False
347
348     def check_aodh_plugin_included(self, compute):
349         """Check if aodh plugin is included in collectd.conf file.
350         If not, try to enable it.
351
352         Keyword arguments:
353         compute -- compute node instance
354
355         Return boolean value whether AODH plugin is included
356         or it's enabling was successful.
357         """
358         compute_name = compute.get_name()
359         nodes = get_apex_nodes()
360         for node in nodes:
361             if compute_name == node.get_dict()['name']:
362                 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
363                 if 'aodh.conf' not in aodh_conf:
364                     self.__logger.info(
365                         "AODH Plugin not included in {}".format(compute_name))
366                     return False
367                 else:
368                     self.__logger.info(
369                         "AODH plugin present in compute node {}" .format(
370                             compute_name))
371                     return True
372         return True
373
374     def check_gnocchi_plugin_included(self, compute):
375         """Check if gnocchi plugin is included in collectd.conf file.
376         If not, try to enable it.
377
378         Keyword arguments:
379         compute -- compute node instance
380
381         Return boolean value whether gnocchi plugin is included
382         or it's enabling was successful.
383         """
384         compute_name = compute.get_name()
385         nodes = get_apex_nodes()
386         for node in nodes:
387             if compute_name == node.get_dict()['name']:
388                 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
389                 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
390                     self.__logger.info(
391                         "Gnocchi Plugin not included in node {}".format(
392                             compute_name))
393                     return False
394                 else:
395                     self.__logger.info(
396                         "Gnocchi plugin available in compute node {}" .format(
397                             compute_name))
398                     return True
399         return True
400
401     def check_snmp_plugin_included(self, compute):
402         """Check if SNMP plugin is active in compute node.
403         """
404         snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
405         snmp_string = 'INTEL-RDT-MIB::intelRdt'
406         compute_name = compute.get_name()
407         nodes = get_apex_nodes()
408         for node in nodes:
409             if compute_name == node.get_dict()['name']:
410                 stdout = node.run_cmd(
411                     'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
412                         snmp_mib, snmp_string))
413                 self.__logger.info("snmp output = {}" .format(stdout))
414                 if 'OID' in stdout:
415                     return False
416                 else:
417                     return True
418
419     def enable_plugins(
420             self, compute, plugins, error_plugins, create_backup=True):
421         """Enable plugins on compute node
422
423         Keyword arguments:
424         compute -- compute node instance
425         plugins -- list of plugins to be enabled
426
427         Return boolean value indicating whether function was successful.
428         """
429         csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
430         plugins = sorted(plugins)
431         compute_name = compute.get_name()
432         nodes = get_apex_nodes()
433         for node in nodes:
434             if compute_name == node.get_dict()['name']:
435                 node.put_file(csv_file, 'csv.conf')
436                 node.run_cmd(
437                     'sudo cp csv.conf '
438                     + '/etc/collectd/collectd.conf.d/csv.conf')
439         return True
440
441     def restart_collectd(self, compute):
442         """Restart collectd on compute node.
443
444         Keyword arguments:
445         compute -- compute node instance
446
447         Retrun tuple with boolean indicating success and list of warnings
448         received during collectd start.
449         """
450         compute_name = compute.get_name()
451         nodes = get_apex_nodes()
452
453         def get_collectd_processes(compute_node):
454             """Get number of running collectd processes.
455
456             Keyword arguments:
457             ssh_session -- instance of SSH session in which to check
458                 for processes
459             """
460             stdout = compute_node.run_cmd("pgrep collectd")
461             return len(stdout)
462
463         for node in nodes:
464             if compute_name == node.get_dict()['name']:
465                 # node.run_cmd('su; "opnfvapex"')
466                 self.__logger.info('Stopping collectd service...')
467                 node.run_cmd('sudo systemctl stop collectd')
468                 time.sleep(10)
469                 if get_collectd_processes(node):
470                     self.__logger.error('Collectd is still running...')
471                     return False, []
472                 self.__logger.info('Starting collectd service...')
473                 stdout = node.run_cmd('sudo systemctl start collectd')
474                 time.sleep(10)
475                 warning = [
476                     output.strip() for output in stdout if 'WARN: ' in output]
477                 if get_collectd_processes(node) == 0:
478                     self.__logger.error('Collectd is still not running...')
479                     return False, warning
480         return True, warning
481
482     def trigger_alarm_update(self, alarm, compute_node):
483         # TODO: move these actions to main, with criteria lists so that we can reference that
484         # i.e. test_plugin_with_aodh(self, compute, plugin.., logger, criteria_list, alarm_action)
485         if alarm == 'mcelog':
486             compute_node.run_cmd('sudo modprobe mce-inject')
487             compute_node.run_cmd('sudo ./mce-inject_ea < corrected')
488         if alarm == 'ovs_events':
489             compute_node.run_cmd('sudo ifconfig -a | grep br0')
490             compute_node.run_cmd('sudo ifconfig br0 down; sudo ifconfig br0 up')
491
492     def test_plugins_with_aodh(
493             self, compute, plugin_interval, logger,
494             criteria_list=[]):
495
496         metric_id = {}
497         timestamps1 = {}
498         timestamps2 = {}
499         nodes = get_apex_nodes()
500         compute_node = [node for node in nodes if node.get_dict()['name'] == compute][0]
501         for node in nodes:
502             if node.is_controller():
503                 self.__logger.info('Getting AODH Alarm list on {}' .format(
504                     (node.get_dict()['name'])))
505                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
506                 self.trigger_alarm_update(criteria_list, compute_node)
507                 stdout = node.run_cmd(
508                     "source overcloudrc.v3;"
509                     + "aodh alarm list | grep {0} | grep {1}"
510                     .format(criteria_list, compute))
511                 if stdout is None:
512                     self.__logger.info("aodh alarm list was empty")
513                     return False
514                 for line in stdout.splitlines():
515                     line = line.replace('|', "")
516                     metric_id = line.split()[0]
517                     stdout = node.run_cmd(
518                         'source overcloudrc.v3; aodh alarm show {}' .format(
519                             metric_id))
520                     if stdout is None:
521                         self.__logger.info("aodh alarm list was empty")
522                         return False
523                     for line in stdout.splitlines()[3: -1]:
524                         line = line.replace('|', "")
525                         if line.split()[0] == 'state_timestamp':
526                             timestamps1 = line.split()[1]
527                     self.trigger_alarm_update(criteria_list, compute_node)
528                     time.sleep(12)
529                     stdout = node.run_cmd(
530                         "source overcloudrc.v3; aodh alarm show {}" .format(
531                             metric_id))
532                     if stdout is None:
533                         self.__logger.info("aodh alarm list was empty")
534                         return False
535                     for line in stdout.splitlines()[3:-1]:
536                         line = line.replace('|', "")
537                         if line.split()[0] == 'state_timestamp':
538                             timestamps2 = line.split()[1]
539                     if timestamps1 == timestamps2:
540                         self.__logger.info(
541                             "Data not updated after interval of 12 seconds")
542                         return False
543                     else:
544                         self.__logger.info("PASS")
545                         return True
546
547     def test_plugins_with_gnocchi(
548             self, compute, plugin_interval, logger,
549             criteria_list=[]):
550
551         metric_id = {}
552         timestamps1 = {}
553         timestamps2 = {}
554         nodes = get_apex_nodes()
555         if plugin_interval > 15:
556             sleep_time = plugin_interval*2
557         else:
558             sleep_time = 30
559
560         for node in nodes:
561             if node.is_controller():
562                 self.__logger.info('Getting gnocchi metric list on {}' .format(
563                     (node.get_dict()['name'])))
564                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
565                 stdout = node.run_cmd(
566                     "source overcloudrc.v3;"
567                     + "gnocchi metric list | grep {0} | grep {1}"
568                     .format(criteria_list, compute))
569                 if stdout is None:
570                         self.__logger.info("gnocchi list was empty")
571                         return False
572                 for line in stdout.splitlines():
573                     line = line.replace('|', "")
574                     metric_id = line.split()[0]
575                     stdout = node.run_cmd(
576                         'source overcloudrc.v3;gnocchi measures show {}'.format(
577                             metric_id))
578                     if stdout is None:
579                         self.__logger.info("gnocchi list was empty")
580                         return False
581                     for line in stdout.splitlines()[3: -1]:
582                         if line[0] == '+':
583                             pass
584                         else:
585                             timestamps1 = line.replace('|', "")
586                             timestamps1 = timestamps1.split()[0]
587                     time.sleep(sleep_time)
588                     stdout = node.run_cmd(
589                         "source overcloudrc.v3;gnocchi measures show {}".format(
590                             metric_id))
591                     if stdout is None:
592                         self.__logger.info("gnocchi measures was empty")
593                         return False
594                     for line in stdout.splitlines()[3:-1]:
595                         if line[0] == '+':
596                             pass
597                         else:
598                             timestamps2 = line.replace('|', "")
599                             timestamps2 = timestamps2.split()[0]
600                     if timestamps1 == timestamps2:
601                         self.__logger.info(
602                             "Plugin Interval is {}" .format(plugin_interval))
603                         self.__logger.info(
604                             "Data not updated after {} seconds".format(
605                                 sleep_time))
606                         return False
607                     else:
608                         self.__logger.info("PASS")
609                         return True
610         return False
611
612     def test_plugins_with_snmp(
613             self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
614             snmp_mib_strings=[], snmp_in_commands=[]):
615
616         if plugin == 'hugepages' or 'intel_rdt' or 'mcelog':
617             nodes = get_apex_nodes()
618             for node in nodes:
619                 if compute == node.get_dict()['name']:
620                     stdout = node.run_cmd(
621                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
622                             snmp_mib_files, snmp_mib_strings))
623                     self.__logger.info("{}" .format(stdout))
624                     if stdout is None:
625                         self.__logger.info("No output from snmpwalk")
626                         return False
627                     elif 'OID' in stdout:
628                         self.__logger.info("SNMP query failed")
629                         return False
630                     else:
631                         counter1 = stdout.split()[3]
632                     time.sleep(10)
633                     stdout = node.run_cmd(
634                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
635                             snmp_mib_files, snmp_mib_strings))
636                     self.__logger.info("{}" .format(stdout))
637                     if stdout is None:
638                         self.__logger.info("No output from snmpwalk")
639                     elif 'OID' in stdout:
640                         self.__logger.info(
641                             "SNMP query failed during second check")
642                         self.__logger.info("waiting for 10 sec")
643                         time.sleep(10)
644                     stdout = node.run_cmd(
645                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
646                             snmp_mib_files, snmp_mib_strings))
647                     self.__logger.info("{}" .format(stdout))
648                     if stdout is None:
649                         self.__logger.info("No output from snmpwalk")
650                     elif 'OID' in stdout:
651                         self.__logger.info("SNMP query failed again")
652                         self.__logger.info("Failing this test case")
653                         return False
654                     else:
655                         counter2 = stdout.split()[3]
656
657                     if counter1 == counter2:
658                         return False
659                     else:
660                         return True
661         else:
662             return False