Merge "Switch from openstack.creds to the new env_file"
[barometer.git] / baro_tests / config_server.py
1 # -*- coding: utf-8 -*-
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 # not use this file except in compliance with the License. You may obtain
5 # a copy of the License at
6 #
7 #      http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 # License for the specific language governing permissions and limitations
13 # under the License.
14
15 """Classes used by collectd.py"""
16
17 import time
18 import os.path
19 import os
20 import re
21
22 from opnfv.deployment import factory
23 import paramiko
24 from functest.utils import constants
25
26 ID_RSA_PATH = '/root/.ssh/id_rsa'
27 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
28 DEF_PLUGIN_INTERVAL = 10
29 COLLECTD_CONF = '/etc/collectd.conf'
30 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
31 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
32 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
33 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
34 APEX_USER = 'root'
35 APEX_USER_STACK = 'stack'
36 APEX_PKEY = '/root/.ssh/id_rsa'
37
38
39 class Node(object):
40     """Node configuration class"""
41     def __init__(self, attrs):
42         self.__null = attrs[0]
43         self.__id = attrs[1]
44         self.__name = attrs[2]
45         self.__status = attrs[3] if attrs[3] else None
46         self.__taskState = attrs[4]
47         self.__pwrState = attrs[5]
48         self.__ip = re.sub('^[a-z]+=', '', attrs[6])
49
50     def get_name(self):
51         """Get node name"""
52         return self.__name
53
54     def get_id(self):
55         """Get node ID"""
56         return self.__id
57
58     def get_ip(self):
59         """Get node IP address"""
60         return self.__ip
61
62     def get_roles(self):
63         """Get node role"""
64         return self.__roles
65
66
67 def get_apex_nodes():
68     handler = factory.Factory.get_handler('apex',
69                                           APEX_IP,
70                                           APEX_USER_STACK,
71                                           APEX_PKEY)
72     nodes = handler.get_nodes()
73     return nodes
74
75
76 class ConfigServer(object):
77     """Class to get env configuration"""
78     def __init__(self, host, user, logger, priv_key=None):
79         self.__host = host
80         self.__user = user
81         self.__passwd = None
82         self.__priv_key = priv_key
83         self.__nodes = list()
84         self.__logger = logger
85
86         self.__private_key_file = ID_RSA_PATH
87         if not os.path.isfile(self.__private_key_file):
88             self.__logger.error(
89                 "Private key file '{}'".format(self.__private_key_file)
90                 + " not found.")
91             raise IOError("Private key file '{}' not found.".format(
92                 self.__private_key_file))
93
94         # get list of available nodes
95         ssh, sftp = self.__open_sftp_session(
96             self.__host, self.__user, self.__passwd)
97         attempt = 1
98         fuel_node_passed = False
99
100         while (attempt <= 10) and not fuel_node_passed:
101             stdin, stdout, stderr = ssh.exec_command(
102                 "source stackrc; nova list")
103             stderr_lines = stderr.readlines()
104             if stderr_lines:
105                 self.__logger.warning(
106                     "'Apex node' command failed (try {}):".format(attempt))
107                 for line in stderr_lines:
108                     self.__logger.debug(line.strip())
109             else:
110                 fuel_node_passed = True
111                 if attempt > 1:
112                     self.__logger.info(
113                         "'Apex node' command passed (try {})".format(attempt))
114             attempt += 1
115         if not fuel_node_passed:
116             self.__logger.error(
117                 "'Apex node' command failed. This was the last try.")
118             raise OSError(
119                 "'Apex node' command failed. This was the last try.")
120         node_table = stdout.readlines()\
121
122         # skip table title and parse table values
123
124         for entry in node_table[3:]:
125             if entry[0] == '+' or entry[0] == '\n':
126                 print entry
127                 pass
128             else:
129                 self.__nodes.append(
130                     Node([str(x.strip(' \n')) for x in entry.split('|')]))
131
132     def get_controllers(self):
133         # Get list of controllers
134         print self.__nodes[0]._Node__ip
135         return (
136             [node for node in self.__nodes if 'controller' in node.get_name()])
137
138     def get_computes(self):
139         # Get list of computes
140         return (
141             [node for node in self.__nodes if 'compute' in node.get_name()])
142
143     def get_nodes(self):
144         # Get list of nodes
145         return self.__nodes
146
147     def __open_sftp_session(self, host, user, passwd=None):
148         # Connect to given host.
149         """Keyword arguments:
150         host -- host to connect
151         user -- user to use
152         passwd -- password to use
153
154         Return tuple of SSH and SFTP client instances.
155         """
156         # create SSH client
157         ssh = paramiko.SSHClient()
158         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
159
160         # try a direct access using password or private key
161         if not passwd and not self.__priv_key:
162             # get private key
163             self.__priv_key = paramiko.RSAKey.from_private_key_file(
164                 self.__private_key_file)
165
166         # connect to the server
167         ssh.connect(
168             host, username=user, password=passwd, pkey=self.__priv_key)
169         sftp = ssh.open_sftp()
170
171         # return SFTP client instance
172         return ssh, sftp
173
174     def get_plugin_interval(self, compute, plugin):
175         """Find the plugin interval in collectd configuration.
176
177         Keyword arguments:
178         compute -- compute node instance
179         plugin -- plug-in name
180
181         If found, return interval value, otherwise the default value"""
182         default_interval = DEF_PLUGIN_INTERVAL
183         compute_name = compute.get_name()
184         nodes = get_apex_nodes()
185         for node in nodes:
186             if compute_name == node.get_dict()['name']:
187                 stdout = node.run_cmd(
188                     'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
189                 if stdout is None:
190                     return default_interval
191                 for line in stdout.split('\n'):
192                     if 'Interval' in line:
193                         return 1
194         return default_interval
195
196     def get_plugin_config_values(self, compute, plugin, parameter):
197         """Get parameter values from collectd config file.
198
199         Keyword arguments:
200         compute -- compute node instance
201         plugin -- plug-in name
202         parameter -- plug-in parameter
203
204         Return list of found values."""
205         default_values = []
206         compute_name = compute.get_name()
207         nodes = get_apex_nodes()
208         for node in nodes:
209             if compute_name == node.get_dict()['name']:
210                 stdout = node.run_cmd(
211                     'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
212                 if stdout is None:
213                     return default_values
214                 for line in stdout.split('\n'):
215                     if 'Interfaces' in line:
216                         return line.split(' ', 1)[1]
217                     elif 'Bridges' in line:
218                         return line.split(' ', 1)[1]
219                     elif 'Cores' in line:
220                         return line.split(' ', 1)[1]
221                     else:
222                         pass
223         return default_values
224
225     def execute_command(self, command, host_ip=None, ssh=None):
226         """Execute command on node and return list of lines of standard output.
227
228         Keyword arguments:
229         command -- command
230         host_ip -- IP of the node
231         ssh -- existing open SSH session to use
232
233         One of host_ip or ssh must not be None. If both are not None,
234         existing ssh session is used.
235         """
236         if host_ip is None and ssh is None:
237             raise ValueError('One of host_ip or ssh must not be None.')
238         if ssh is None:
239             ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
240         stdin, stdout, stderr = ssh.exec_command(command)
241         return stdout.readlines()
242
243     def get_ovs_interfaces(self, compute):
244         """Get list of configured OVS interfaces
245
246         Keyword arguments:
247         compute -- compute node instance
248         """
249         compute_name = compute.get_name()
250         nodes = get_apex_nodes()
251         for node in nodes:
252             if compute_name == node.get_dict()['name']:
253                 stdout = node.run_cmd('sudo ovs-vsctl list-br')
254         return stdout
255
256     def is_gnocchi_running(self, controller):
257         """Check whether Gnocchi is running on controller.
258
259         Keyword arguments:
260         controller -- controller node instance
261
262         Return boolean value whether Gnocchi is running.
263         """
264         gnocchi_present = False
265         controller_name = controller.get_name()
266         nodes = get_apex_nodes()
267         for node in nodes:
268             if controller_name == node.get_dict()['name']:
269                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
270                 stdout = node.run_cmd(
271                     "source overcloudrc.v3;"
272                     + "openstack catalog list | grep gnocchi")
273                 if stdout is None:
274                     return False
275                 elif 'gnocchi' in stdout:
276                     gnocchi_present = True
277                     return gnocchi_present
278                 else:
279                     return False
280         return gnocchi_present
281
282     def is_aodh_running(self, controller):
283         """Check whether aodh service is running on controller
284         """
285         aodh_present = False
286         controller_name = controller.get_name()
287         nodes = get_apex_nodes()
288         for node in nodes:
289             if controller_name == node.get_dict()['name']:
290                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
291                 stdout = node.run_cmd(
292                     "source overcloudrc.v3;"
293                     + "openstack catalog list | grep aodh")
294                 if stdout is None:
295                     return False
296                 elif 'aodh' in stdout:
297                     aodh_present = True
298                     return aodh_present
299                 else:
300                     return False
301         return aodh_present
302
303     def is_mcelog_installed(self, compute, package):
304         """Check whether package exists on compute node.
305
306         Keyword arguments:
307         compute -- compute node instance
308         package -- Linux package to search for
309
310         Return boolean value whether package is installed.
311         """
312         compute_name = compute.get_name()
313         nodes = get_apex_nodes()
314         for node in nodes:
315             if compute_name == node.get_dict()['name']:
316                 stdout = node.run_cmd(
317                     'rpm -qa | grep mcelog')
318                 if stdout is None:
319                     return 0
320                 elif 'mcelog' in stdout:
321                     return 1
322                 else:
323                     return 0
324
325     def is_libpqos_on_node(self, compute):
326         """Check whether libpqos is present on compute node"""
327
328         compute_name = compute.get_name()
329         nodes = get_apex_nodes()
330         for node in nodes:
331             if compute_name == node.get_dict()['name']:
332                 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
333                 if 'libpqos' in stdout:
334                     return True
335         return False
336
337     def check_aodh_plugin_included(self, compute):
338         """Check if aodh plugin is included in collectd.conf file.
339         If not, try to enable it.
340
341         Keyword arguments:
342         compute -- compute node instance
343
344         Return boolean value whether AODH plugin is included
345         or it's enabling was successful.
346         """
347         compute_name = compute.get_name()
348         nodes = get_apex_nodes()
349         for node in nodes:
350             if compute_name == node.get_dict()['name']:
351                 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
352                 if 'aodh.conf' not in aodh_conf:
353                     self.__logger.info(
354                         "AODH Plugin not included in {}".format(compute_name))
355                     return False
356                 else:
357                     self.__logger.info(
358                         "AODH plugin present in compute node {}" .format(
359                             compute_name))
360                     return True
361         return True
362
363     def check_gnocchi_plugin_included(self, compute):
364         """Check if gnocchi plugin is included in collectd.conf file.
365         If not, try to enable it.
366
367         Keyword arguments:
368         compute -- compute node instance
369
370         Return boolean value whether gnocchi plugin is included
371         or it's enabling was successful.
372         """
373         compute_name = compute.get_name()
374         nodes = get_apex_nodes()
375         for node in nodes:
376             if compute_name == node.get_dict()['name']:
377                 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
378                 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
379                     self.__logger.info(
380                         "Gnocchi Plugin not included in node {}".format(
381                             compute_name))
382                     return False
383                 else:
384                     self.__logger.info(
385                         "Gnocchi plugin available in compute node {}" .format(
386                             compute_name))
387                     return True
388         return True
389
390     def check_snmp_plugin_included(self, compute):
391         """Check if SNMP plugin is active in compute node.
392         """
393         snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
394         snmp_string = 'INTEL-RDT-MIB::intelRdt'
395         compute_name = compute.get_name()
396         nodes = get_apex_nodes()
397         for node in nodes:
398             if compute_name == node.get_dict()['name']:
399                 stdout = node.run_cmd(
400                     'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
401                         snmp_mib, snmp_string))
402                 self.__logger.info("snmp output = {}" .format(stdout))
403                 if 'OID' in stdout:
404                     return False
405                 else:
406                     return True
407
408     def enable_plugins(
409             self, compute, plugins, error_plugins, create_backup=True):
410         """Enable plugins on compute node
411
412         Keyword arguments:
413         compute -- compute node instance
414         plugins -- list of plugins to be enabled
415
416         Return boolean value indicating whether function was successful.
417         """
418         csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
419         plugins = sorted(plugins)
420         compute_name = compute.get_name()
421         nodes = get_apex_nodes()
422         for node in nodes:
423             if compute_name == node.get_dict()['name']:
424                 node.put_file(csv_file, 'csv.conf')
425                 node.run_cmd(
426                     'sudo cp csv.conf '
427                     + '/etc/collectd/collectd.conf.d/csv.conf')
428         return True
429
430     def restart_collectd(self, compute):
431         """Restart collectd on compute node.
432
433         Keyword arguments:
434         compute -- compute node instance
435
436         Retrun tuple with boolean indicating success and list of warnings
437         received during collectd start.
438         """
439         compute_name = compute.get_name()
440         nodes = get_apex_nodes()
441
442         def get_collectd_processes(compute_node):
443             """Get number of running collectd processes.
444
445             Keyword arguments:
446             ssh_session -- instance of SSH session in which to check
447                 for processes
448             """
449             stdout = compute_node.run_cmd("pgrep collectd")
450             return len(stdout)
451
452         for node in nodes:
453             if compute_name == node.get_dict()['name']:
454                 # node.run_cmd('su; "opnfvapex"')
455                 self.__logger.info('Stopping collectd service...')
456                 node.run_cmd('sudo systemctl stop collectd')
457                 time.sleep(10)
458                 if get_collectd_processes(node):
459                     self.__logger.error('Collectd is still running...')
460                     return False, []
461                 self.__logger.info('Starting collectd service...')
462                 stdout = node.run_cmd('sudo systemctl start collectd')
463                 time.sleep(10)
464                 warning = [
465                     output.strip() for output in stdout if 'WARN: ' in output]
466                 if get_collectd_processes(node) == 0:
467                     self.__logger.error('Collectd is still not running...')
468                     return False, warning
469         return True, warning
470
471     def test_plugins_with_aodh(
472             self, compute, plugin_interval, logger,
473             criteria_list=[]):
474
475         metric_id = {}
476         timestamps1 = {}
477         timestamps2 = {}
478         nodes = get_apex_nodes()
479         for node in nodes:
480             if node.is_controller():
481                 self.__logger.info('Getting AODH Alarm list on {}' .format(
482                     (node.get_dict()['name'])))
483                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
484                 stdout = node.run_cmd(
485                     "source overcloudrc.v3;"
486                     + "aodh alarm list | grep {0} | grep {1}"
487                     .format(criteria_list, compute))
488                 if stdout is None:
489                     self.__logger.info("aodh alarm list was empty")
490                     return False
491                 for line in stdout.splitlines():
492                     line = line.replace('|', "")
493                     metric_id = line.split()[0]
494                     stdout = node.run_cmd(
495                         'source overcloudrc.v3; aodh alarm show {}' .format(
496                             metric_id))
497                     if stdout is None:
498                         self.__logger.info("aodh alarm list was empty")
499                         return False
500                     for line in stdout.splitlines()[3: -1]:
501                         line = line.replace('|', "")
502                         if line.split()[0] == 'timestamp':
503                             timestamps1 = line.split()[1]
504                         else:
505                             pass
506                     time.sleep(12)
507                     stdout = node.run_cmd(
508                         "source overcloudrc.v3; aodh alarm show {}" .format(
509                             metric_id))
510                     if stdout is None:
511                         self.__logger.info("aodh alarm list was empty")
512                         return False
513                     for line in stdout.splitlines()[3:-1]:
514                         line = line.replace('|', "")
515                         if line.split()[0] == 'timestamp':
516                             timestamps2 = line.split()[1]
517                         else:
518                             pass
519                     if timestamps1 == timestamps2:
520                         self.__logger.info(
521                             "Data not updated after interval of 12 seconds")
522                         return False
523                     else:
524                         self.__logger.info("PASS")
525                         return True
526
527     def test_plugins_with_gnocchi(
528             self, compute, plugin_interval, logger,
529             criteria_list=[]):
530
531         metric_id = {}
532         timestamps1 = {}
533         timestamps2 = {}
534         nodes = get_apex_nodes()
535         sleep_time = plugin_interval + 2
536         for node in nodes:
537             if node.is_controller():
538                 self.__logger.info('Getting gnocchi metric list on {}' .format(
539                     (node.get_dict()['name'])))
540                 node.put_file(constants.ENV_FILE, 'overcloudrc.v3')
541                 stdout = node.run_cmd(
542                     "source overcloudrc.v3;"
543                     + "gnocchi metric list | grep {0} | grep {1}"
544                     .format(criteria_list, compute))
545                 if stdout is None:
546                         self.__logger.info("gnocchi list was empty")
547                         return False
548                 for line in stdout.splitlines():
549                     line = line.replace('|', "")
550                     metric_id = line.split()[0]
551                     stdout = node.run_cmd(
552                         'source overcloudrc.v3;gnocchi measures show {}'.format(
553                             metric_id))
554                     if stdout is None:
555                         self.__logger.info("gnocchi list was empty")
556                         return False
557                     for line in stdout.splitlines()[3: -1]:
558                         if line[0] == '+':
559                             pass
560                         else:
561                             timestamps1 = line.replace('|', "")
562                             timestamps1 = timestamps1.split()[0]
563                     time.sleep(sleep_time)
564                     stdout = node.run_cmd(
565                         "source overcloudrc.v3;gnocchi measures show {}".format(
566                             metric_id))
567                     if stdout is None:
568                         self.__logger.info("gnocchi measures was empty")
569                         return False
570                     for line in stdout.splitlines()[3:-1]:
571                         if line[0] == '+':
572                             pass
573                         else:
574                             timestamps2 = line.replace('|', "")
575                             timestamps2 = timestamps2.split()[0]
576                     if timestamps1 == timestamps2:
577                         self.__logger.info(
578                             "Plugin Interval is {}" .format(plugin_interval))
579                         self.__logger.info(
580                             "Data not updated after {} seconds".format(
581                                 sleep_time))
582                         return False
583                     else:
584                         self.__logger.info("PASS")
585                         return True
586         return False
587
588     def test_plugins_with_snmp(
589             self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
590             snmp_mib_strings=[], snmp_in_commands=[]):
591
592         if plugin == 'hugepages' or 'intel_rdt' or 'mcelog':
593             nodes = get_apex_nodes()
594             for node in nodes:
595                 if compute == node.get_dict()['name']:
596                     stdout = node.run_cmd(
597                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
598                             snmp_mib_files, snmp_mib_strings))
599                     self.__logger.info("{}" .format(stdout))
600                     if stdout is None:
601                         self.__logger.info("No output from snmpwalk")
602                         return False
603                     elif 'OID' in stdout:
604                         self.__logger.info("SNMP query failed")
605                         return False
606                     else:
607                         counter1 = stdout.split()[3]
608                     time.sleep(10)
609                     stdout = node.run_cmd(
610                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
611                             snmp_mib_files, snmp_mib_strings))
612                     self.__logger.info("{}" .format(stdout))
613                     if stdout is None:
614                         self.__logger.info("No output from snmpwalk")
615                     elif 'OID' in stdout:
616                         self.__logger.info(
617                             "SNMP query failed during second check")
618                         self.__logger.info("waiting for 10 sec")
619                         time.sleep(10)
620                     stdout = node.run_cmd(
621                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
622                             snmp_mib_files, snmp_mib_strings))
623                     self.__logger.info("{}" .format(stdout))
624                     if stdout is None:
625                         self.__logger.info("No output from snmpwalk")
626                     elif 'OID' in stdout:
627                         self.__logger.info("SNMP query failed again")
628                         self.__logger.info("Failing this test case")
629                         return False
630                     else:
631                         counter2 = stdout.split()[3]
632
633                     if counter1 == counter2:
634                         return False
635                     else:
636                         return True
637         else:
638             return False