Adding improvements to the log messages
[barometer.git] / baro_tests / config_server.py
1 # -*- coding: utf-8 -*-
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License"); you may
4 # not use this file except in compliance with the License. You may obtain
5 # a copy of the License at
6 #
7 #      http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 # License for the specific language governing permissions and limitations
13 # under the License.
14
15 """Classes used by collectd.py"""
16
17 import paramiko
18 import time
19 import os.path
20 import os
21 import re
22 from opnfv.deployment import factory
23 ID_RSA_PATH = '/root/.ssh/id_rsa'
24 SSH_KEYS_SCRIPT = '/home/opnfv/barometer/baro_utils/get_ssh_keys.sh'
25 DEF_PLUGIN_INTERVAL = 10
26 COLLECTD_CONF = '/etc/collectd.conf'
27 COLLECTD_CONF_DIR = '/etc/collectd/collectd.conf.d'
28 NOTIFICATION_FILE = '/var/log/python-notifications.dump'
29 COLLECTD_NOTIFICATION = '/etc/collectd_notification_dump.py'
30 APEX_IP = os.getenv("INSTALLER_IP").rstrip('\n')
31 APEX_USER = 'root'
32 APEX_USER_STACK = 'stack'
33 APEX_PKEY = '/root/.ssh/id_rsa'
34
35
36 class Node(object):
37     """Node configuration class"""
38     def __init__(self, attrs):
39         self.__null = attrs[0]
40         self.__id = attrs[1]
41         self.__name = attrs[2]
42         self.__status = attrs[3] if attrs[3] else None
43         self.__taskState = attrs[4]
44         self.__pwrState = attrs[5]
45         self.__ip = re.sub('^[a-z]+=', '', attrs[6])
46
47     def get_name(self):
48         """Get node name"""
49         return self.__name
50
51     def get_id(self):
52         """Get node ID"""
53         return self.__id
54
55     def get_ip(self):
56         """Get node IP address"""
57         return self.__ip
58
59     def get_roles(self):
60         """Get node role"""
61         return self.__roles
62
63
64 def get_apex_nodes():
65     handler = factory.Factory.get_handler('apex',
66                                           APEX_IP,
67                                           APEX_USER_STACK,
68                                           APEX_PKEY)
69     nodes = handler.get_nodes()
70     return nodes
71
72
73 class ConfigServer(object):
74     """Class to get env configuration"""
75     def __init__(self, host, user, logger, priv_key=None):
76         self.__host = host
77         self.__user = user
78         self.__passwd = None
79         self.__priv_key = priv_key
80         self.__nodes = list()
81         self.__logger = logger
82
83         self.__private_key_file = ID_RSA_PATH
84         if not os.path.isfile(self.__private_key_file):
85             self.__logger.error(
86                 "Private key file '{}'".format(self.__private_key_file)
87                 + " not found.")
88             raise IOError("Private key file '{}' not found.".format(
89                 self.__private_key_file))
90
91         # get list of available nodes
92         ssh, sftp = self.__open_sftp_session(
93             self.__host, self.__user, self.__passwd)
94         attempt = 1
95         fuel_node_passed = False
96
97         while (attempt <= 10) and not fuel_node_passed:
98             stdin, stdout, stderr = ssh.exec_command(
99                 "source stackrc; nova list")
100             stderr_lines = stderr.readlines()
101             if stderr_lines:
102                 self.__logger.warning(
103                     "'Apex node' command failed (try {}):".format(attempt))
104                 for line in stderr_lines:
105                     self.__logger.debug(line.strip())
106             else:
107                 fuel_node_passed = True
108                 if attempt > 1:
109                     self.__logger.info(
110                         "'Apex node' command passed (try {})".format(attempt))
111             attempt += 1
112         if not fuel_node_passed:
113             self.__logger.error(
114                 "'Apex node' command failed. This was the last try.")
115             raise OSError(
116                 "'Apex node' command failed. This was the last try.")
117         node_table = stdout.readlines()\
118
119         # skip table title and parse table values
120
121         for entry in node_table[3:]:
122             if entry[0] == '+' or entry[0] == '\n':
123                 print entry
124                 pass
125             else:
126                 self.__nodes.append(
127                     Node([str(x.strip(' \n')) for x in entry.split('|')]))
128
129     def get_controllers(self):
130         # Get list of controllers
131         print self.__nodes[0]._Node__ip
132         return (
133             [node for node in self.__nodes if 'controller' in node.get_name()])
134
135     def get_computes(self):
136         # Get list of computes
137         return (
138             [node for node in self.__nodes if 'compute' in node.get_name()])
139
140     def get_nodes(self):
141         # Get list of nodes
142         return self.__nodes
143
144     def __open_sftp_session(self, host, user, passwd=None):
145         # Connect to given host.
146         """Keyword arguments:
147         host -- host to connect
148         user -- user to use
149         passwd -- password to use
150
151         Return tuple of SSH and SFTP client instances.
152         """
153         # create SSH client
154         ssh = paramiko.SSHClient()
155         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
156
157         # try a direct access using password or private key
158         if not passwd and not self.__priv_key:
159             # get private key
160             self.__priv_key = paramiko.RSAKey.from_private_key_file(
161                 self.__private_key_file)
162
163         # connect to the server
164         ssh.connect(
165             host, username=user, password=passwd, pkey=self.__priv_key)
166         sftp = ssh.open_sftp()
167
168         # return SFTP client instance
169         return ssh, sftp
170
171     def get_plugin_interval(self, compute, plugin):
172         """Find the plugin interval in collectd configuration.
173
174         Keyword arguments:
175         compute -- compute node instance
176         plugin -- plug-in name
177
178         If found, return interval value, otherwise the default value"""
179         default_interval = DEF_PLUGIN_INTERVAL
180         compute_name = compute.get_name()
181         nodes = get_apex_nodes()
182         for node in nodes:
183             if compute_name == node.get_dict()['name']:
184                 stdout = node.run_cmd(
185                     'cat /etc/collectd/collectd.conf.d/{}.conf'.format(plugin))
186                 if stdout is None:
187                     return default_interval
188                 for line in stdout.split('\n'):
189                     if 'Interval' in line:
190                         return 1
191         return default_interval
192
193     def get_plugin_config_values(self, compute, plugin, parameter):
194         """Get parameter values from collectd config file.
195
196         Keyword arguments:
197         compute -- compute node instance
198         plugin -- plug-in name
199         parameter -- plug-in parameter
200
201         Return list of found values."""
202         default_values = []
203         compute_name = compute.get_name()
204         nodes = get_apex_nodes()
205         for node in nodes:
206             if compute_name == node.get_dict()['name']:
207                 stdout = node.run_cmd(
208                     'cat /etc/collectd/collectd.conf.d/{}.conf' .format(plugin))
209                 if stdout is None:
210                     return default_values
211                 for line in stdout.split('\n'):
212                     if 'Interfaces' in line:
213                         return line.split(' ', 1)[1]
214                     elif 'Bridges' in line:
215                         return line.split(' ', 1)[1]
216                     elif 'Cores' in line:
217                         return line.split(' ', 1)[1]
218                     else:
219                         pass
220         return default_values
221
222     def execute_command(self, command, host_ip=None, ssh=None):
223         """Execute command on node and return list of lines of standard output.
224
225         Keyword arguments:
226         command -- command
227         host_ip -- IP of the node
228         ssh -- existing open SSH session to use
229
230         One of host_ip or ssh must not be None. If both are not None,
231         existing ssh session is used.
232         """
233         if host_ip is None and ssh is None:
234             raise ValueError('One of host_ip or ssh must not be None.')
235         if ssh is None:
236             ssh, sftp = self.__open_sftp_session(host_ip, 'root', 'opnfvapex')
237         stdin, stdout, stderr = ssh.exec_command(command)
238         return stdout.readlines()
239
240     def get_ovs_interfaces(self, compute):
241         """Get list of configured OVS interfaces
242
243         Keyword arguments:
244         compute -- compute node instance
245         """
246         compute_name = compute.get_name()
247         nodes = get_apex_nodes()
248         for node in nodes:
249             if compute_name == node.get_dict()['name']:
250                 stdout = node.run_cmd('sudo ovs-vsctl list-br')
251         return stdout
252
253     def is_gnocchi_running(self, controller):
254         """Check whether Gnocchi is running on controller.
255
256         Keyword arguments:
257         controller -- controller node instance
258
259         Return boolean value whether Gnocchi is running.
260         """
261         gnocchi_present = False
262         controller_name = controller.get_name()
263         nodes = get_apex_nodes()
264         for node in nodes:
265             if controller_name == node.get_dict()['name']:
266                 node.put_file(
267                     '/home/opnfv/functest/conf/openstack.creds',
268                     'overcloudrc.v3')
269                 stdout = node.run_cmd(
270                     "source overcloudrc.v3;"
271                     + "openstack catalog list | grep gnocchi")
272                 if stdout is None:
273                     return False
274                 elif 'gnocchi' in stdout:
275                     gnocchi_present = True
276                     return gnocchi_present
277                 else:
278                     return False
279         return gnocchi_present
280
281     def is_aodh_running(self, controller):
282         """Check whether aodh service is running on controller
283         """
284         aodh_present = False
285         controller_name = controller.get_name()
286         nodes = get_apex_nodes()
287         for node in nodes:
288             if controller_name == node.get_dict()['name']:
289                 node.put_file(
290                     '/home/opnfv/functest/conf/openstack.creds',
291                     'overcloudrc.v3')
292                 stdout = node.run_cmd(
293                     "source overcloudrc.v3;"
294                     + "openstack catalog list | grep aodh")
295                 if stdout is None:
296                     return False
297                 elif 'aodh' in stdout:
298                     aodh_present = True
299                     return aodh_present
300                 else:
301                     return False
302         return aodh_present
303
304     def is_mcelog_installed(self, compute, package):
305         """Check whether package exists on compute node.
306
307         Keyword arguments:
308         compute -- compute node instance
309         package -- Linux package to search for
310
311         Return boolean value whether package is installed.
312         """
313         compute_name = compute.get_name()
314         nodes = get_apex_nodes()
315         for node in nodes:
316             if compute_name == node.get_dict()['name']:
317                 stdout = node.run_cmd(
318                     'rpm -qa | grep mcelog')
319                 if stdout is None:
320                     return 0
321                 elif 'mcelog' in stdout:
322                     return 1
323                 else:
324                     return 0
325
326     def is_libpqos_on_node(self, compute):
327         """Check whether libpqos is present on compute node"""
328
329         compute_name = compute.get_name()
330         nodes = get_apex_nodes()
331         for node in nodes:
332             if compute_name == node.get_dict()['name']:
333                 stdout = node.run_cmd('ls /usr/local/lib/ | grep libpqos')
334                 if 'libpqos' in stdout:
335                     return True
336         return False
337
338     def check_aodh_plugin_included(self, compute):
339         """Check if aodh plugin is included in collectd.conf file.
340         If not, try to enable it.
341
342         Keyword arguments:
343         compute -- compute node instance
344
345         Return boolean value whether AODH plugin is included
346         or it's enabling was successful.
347         """
348         compute_name = compute.get_name()
349         nodes = get_apex_nodes()
350         for node in nodes:
351             if compute_name == node.get_dict()['name']:
352                 aodh_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
353                 if 'aodh.conf' not in aodh_conf:
354                     self.__logger.info(
355                         "AODH Plugin not included in {}".format(compute_name))
356                     return False
357                 else:
358                     self.__logger.info(
359                         "AODH plugin present in compute node {}" .format(
360                             compute_name))
361                     return True
362         return True
363
364     def check_gnocchi_plugin_included(self, compute):
365         """Check if gnocchi plugin is included in collectd.conf file.
366         If not, try to enable it.
367
368         Keyword arguments:
369         compute -- compute node instance
370
371         Return boolean value whether gnocchi plugin is included
372         or it's enabling was successful.
373         """
374         compute_name = compute.get_name()
375         nodes = get_apex_nodes()
376         for node in nodes:
377             if compute_name == node.get_dict()['name']:
378                 gnocchi_conf = node.run_cmd('ls /etc/collectd/collectd.conf.d')
379                 if 'collectd-ceilometer-plugin.conf' not in gnocchi_conf:
380                     self.__logger.info(
381                         "Gnocchi Plugin not included in node {}".format(
382                             compute_name))
383                     return False
384                 else:
385                     self.__logger.info(
386                         "Gnocchi plugin available in compute node {}" .format(
387                             compute_name))
388                     return True
389         return True
390
391     def check_snmp_plugin_included(self, compute):
392         """Check if SNMP plugin is active in compute node.
393         """
394         snmp_mib = '/usr/share/snmp/mibs/Intel-Rdt.txt'
395         snmp_string = 'INTEL-RDT-MIB::intelRdt'
396         compute_name = compute.get_name()
397         nodes = get_apex_nodes()
398         for node in nodes:
399             if compute_name == node.get_dict()['name']:
400                 stdout = node.run_cmd(
401                     'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
402                         snmp_mib, snmp_string))
403                 self.__logger.info("snmp output = {}" .format(stdout))
404                 if 'OID' in stdout:
405                     return False
406                 else:
407                     return True
408
409     def enable_plugins(
410             self, compute, plugins, error_plugins, create_backup=True):
411         """Enable plugins on compute node
412
413         Keyword arguments:
414         compute -- compute node instance
415         plugins -- list of plugins to be enabled
416
417         Return boolean value indicating whether function was successful.
418         """
419         csv_file = os.path.dirname(os.path.realpath(__file__)) + '/csv.conf'
420         plugins = sorted(plugins)
421         compute_name = compute.get_name()
422         nodes = get_apex_nodes()
423         for node in nodes:
424             if compute_name == node.get_dict()['name']:
425                 node.put_file(csv_file, 'csv.conf')
426                 node.run_cmd(
427                     'sudo cp csv.conf '
428                     + '/etc/collectd/collectd.conf.d/csv.conf')
429         return True
430
431     def restart_collectd(self, compute):
432         """Restart collectd on compute node.
433
434         Keyword arguments:
435         compute -- compute node instance
436
437         Retrun tuple with boolean indicating success and list of warnings
438         received during collectd start.
439         """
440         compute_name = compute.get_name()
441         nodes = get_apex_nodes()
442
443         def get_collectd_processes(compute_node):
444             """Get number of running collectd processes.
445
446             Keyword arguments:
447             ssh_session -- instance of SSH session in which to check
448                 for processes
449             """
450             stdout = compute_node.run_cmd("pgrep collectd")
451             return len(stdout)
452
453         for node in nodes:
454             if compute_name == node.get_dict()['name']:
455                 # node.run_cmd('su; "opnfvapex"')
456                 self.__logger.info('Stopping collectd service...')
457                 node.run_cmd('sudo systemctl stop collectd')
458                 time.sleep(10)
459                 if get_collectd_processes(node):
460                     self.__logger.error('Collectd is still running...')
461                     return False, []
462                 self.__logger.info('Starting collectd service...')
463                 stdout = node.run_cmd('sudo systemctl start collectd')
464                 time.sleep(10)
465                 warning = [
466                     output.strip() for output in stdout if 'WARN: ' in output]
467                 if get_collectd_processes(node) == 0:
468                     self.__logger.error('Collectd is still not running...')
469                     return False, warning
470         return True, warning
471
472     def test_plugins_with_aodh(
473             self, compute, plugin_interval, logger,
474             criteria_list=[]):
475
476         metric_id = {}
477         timestamps1 = {}
478         timestamps2 = {}
479         nodes = get_apex_nodes()
480         for node in nodes:
481             if node.is_controller():
482                 self.__logger.info('Getting AODH Alarm list on {}' .format(
483                     (node.get_dict()['name'])))
484                 node.put_file(
485                     '/home/opnfv/functest/conf/openstack.creds',
486                     'overcloudrc.v3')
487                 stdout = node.run_cmd(
488                     "source overcloudrc.v3;"
489                     + "aodh alarm list | grep {0} | grep {1}"
490                     .format(criteria_list, compute))
491                 if stdout is None:
492                     self.__logger.info("aodh alarm list was empty")
493                     return False
494                 for line in stdout.splitlines():
495                     line = line.replace('|', "")
496                     metric_id = line.split()[0]
497                     stdout = node.run_cmd(
498                         'source overcloudrc.v3; aodh alarm show {}' .format(
499                             metric_id))
500                     if stdout is None:
501                         self.__logger.info("aodh alarm list was empty")
502                         return False
503                     for line in stdout.splitlines()[3: -1]:
504                         line = line.replace('|', "")
505                         if line.split()[0] == 'timestamp':
506                             timestamps1 = line.split()[1]
507                         else:
508                             pass
509                     time.sleep(12)
510                     stdout = node.run_cmd(
511                         "source overcloudrc.v3; aodh alarm show {}" .format(
512                             metric_id))
513                     if stdout is None:
514                         self.__logger.info("aodh alarm list was empty")
515                         return False
516                     for line in stdout.splitlines()[3:-1]:
517                         line = line.replace('|', "")
518                         if line.split()[0] == 'timestamp':
519                             timestamps2 = line.split()[1]
520                         else:
521                             pass
522                     if timestamps1 == timestamps2:
523                         self.__logger.info(
524                             "Data not updated after interval of 12 seconds")
525                         return False
526                     else:
527                         self.__logger.info("PASS")
528                         return True
529
530     def test_plugins_with_gnocchi(
531             self, compute, plugin_interval, logger,
532             criteria_list=[]):
533
534         metric_id = {}
535         timestamps1 = {}
536         timestamps2 = {}
537         nodes = get_apex_nodes()
538         sleep_time = plugin_interval + 2
539         for node in nodes:
540             if node.is_controller():
541                 self.__logger.info('Getting gnocchi metric list on {}' .format(
542                     (node.get_dict()['name'])))
543                 node.put_file(
544                     '/home/opnfv/functest/conf/openstack.creds',
545                     'overcloudrc.v3')
546                 stdout = node.run_cmd(
547                     "source overcloudrc.v3;"
548                     + "gnocchi metric list | grep {0} | grep {1}"
549                     .format(criteria_list, compute))
550                 if stdout is None:
551                         self.__logger.info("gnocchi list was empty")
552                         return False
553                 for line in stdout.splitlines():
554                     line = line.replace('|', "")
555                     metric_id = line.split()[0]
556                     stdout = node.run_cmd(
557                         'source overcloudrc.v3;gnocchi measures show {}'.format(
558                             metric_id))
559                     if stdout is None:
560                         self.__logger.info("gnocchi list was empty")
561                         return False
562                     for line in stdout.splitlines()[3: -1]:
563                         if line[0] == '+':
564                             pass
565                         else:
566                             timestamps1 = line.replace('|', "")
567                             timestamps1 = timestamps1.split()[0]
568                     time.sleep(sleep_time)
569                     stdout = node.run_cmd(
570                         "source overcloudrc.v3;gnocchi measures show {}".format(
571                             metric_id))
572                     if stdout is None:
573                         self.__logger.info("gnocchi measures was empty")
574                         return False
575                     for line in stdout.splitlines()[3:-1]:
576                         if line[0] == '+':
577                             pass
578                         else:
579                             timestamps2 = line.replace('|', "")
580                             timestamps2 = timestamps2.split()[0]
581                     if timestamps1 == timestamps2:
582                         self.__logger.info(
583                             "Plugin Interval is {}" .format(plugin_interval))
584                         self.__logger.info(
585                             "Data not updated after {} seconds".format(
586                                 sleep_time))
587                         return False
588                     else:
589                         self.__logger.info("PASS")
590                         return True
591         return False
592
593     def test_plugins_with_snmp(
594             self, compute, plugin_interval, logger, plugin, snmp_mib_files=[],
595             snmp_mib_strings=[], snmp_in_commands=[]):
596
597         if plugin == 'hugepages' or 'intel_rdt' or 'mcelog':
598             nodes = get_apex_nodes()
599             for node in nodes:
600                 if compute == node.get_dict()['name']:
601                     stdout = node.run_cmd(
602                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
603                             snmp_mib_files, snmp_mib_strings))
604                     self.__logger.info("{}" .format(stdout))
605                     if stdout is None:
606                         self.__logger.info("No output from snmpwalk")
607                         return False
608                     elif 'OID' in stdout:
609                         self.__logger.info("SNMP query failed")
610                         return False
611                     else:
612                         counter1 = stdout.split()[3]
613                     time.sleep(10)
614                     stdout = node.run_cmd(
615                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
616                             snmp_mib_files, snmp_mib_strings))
617                     self.__logger.info("{}" .format(stdout))
618                     if stdout is None:
619                         self.__logger.info("No output from snmpwalk")
620                     elif 'OID' in stdout:
621                         self.__logger.info(
622                             "SNMP query failed during second check")
623                         self.__logger.info("waiting for 10 sec")
624                         time.sleep(10)
625                     stdout = node.run_cmd(
626                         'snmpwalk -v2c -m {0} -c public localhost {1}' .format(
627                             snmp_mib_files, snmp_mib_strings))
628                     self.__logger.info("{}" .format(stdout))
629                     if stdout is None:
630                         self.__logger.info("No output from snmpwalk")
631                     elif 'OID' in stdout:
632                         self.__logger.info("SNMP query failed again")
633                         self.__logger.info("Failing this test case")
634                         return False
635                     else:
636                         counter2 = stdout.split()[3]
637
638                     if counter1 == counter2:
639                         return False
640                     else:
641                         return True
642         else:
643             return False