51b3430e52a3cc864d6eb35bcde51671d3578a0c
[releng-anteater.git] / anteater / src / patch_scan.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 ##############################################################################
4 # Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
5 #
6 # All rights reserved. This program and the accompanying materials
7 # are made available under the terms of the Apache License, Version 2.0
8 # which accompanies this distribution, and is available at
9 # http://www.apache.org/licenses/LICENSE-2.0
10 ##############################################################################
11
12 """
13     Accepts the --patchset argument and iterates through each line of the
14     patchset file to perform various checks such as if the file is a binary, or
15     contains a blacklisted string. If any violations are found, the script
16     exits with code 1 and logs the violation(s) found.
17 """
18
19 from __future__ import division, print_function, absolute_import
20 from binaryornot.check import is_binary
21 import anteater.utils.anteater_logger as antlog
22 import anteater.src.get_lists as get_lists
23 import ConfigParser
24 import hashlib
25 import sys
26 import re
27
28
29 logger = antlog.Logger(__name__).getLogger()
30 config = ConfigParser.RawConfigParser()
31 config.read('anteater.conf')
32 reports_dir = config.get('config', 'reports_dir')
33 failure = False
34 hasher = hashlib.sha256()
35
36
37 def prepare_patchset(project, patchset):
38     """ Create black/white lists and default / project waivers
39         and iterates over patchset file """
40
41     # Get Various Lists / Project Waivers
42     lists = get_lists.GetLists()
43     # Get binary white list
44     binary_list = lists.binary_list(project)
45
46     # Get file name black list and project waivers
47     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
48
49     # Get file content black list and project waivers
50     file_content_list, \
51         file_content_project_list = lists.file_content_list(project)
52
53     # Get Licence Lists
54     licence_ext = lists.licence_extensions()
55     licence_ignore = lists.licence_ignore()
56
57     # Open patch set to get file list
58     fo = open(patchset, 'r')
59     lines = fo.readlines()
60
61     for line in lines:
62         patch_file = line.strip('\n')
63         # Perform binary and file / content checks
64         scan_patch(project, patch_file, binary_list,
65                    file_audit_list, file_audit_project_list,
66                    file_content_list, file_content_project_list, licence_ext,
67                    licence_ignore)
68
69     # Process each file in patch set using waivers generated above
70     # Process final result
71     process_failure()
72
73
74 def scan_patch(project, patch_file, binary_list, file_audit_list,
75                file_audit_project_list, file_content_list,
76                file_content_project_list, licence_ext, licence_ignore):
77     """ Scan actions for each commited file in patch set """
78     global failure
79     if is_binary(patch_file):
80         hashlist = get_lists.GetLists()
81         binary_hash = hashlist.binary_hash(project, patch_file)
82         if not binary_list.search(patch_file):
83             with open(patch_file, 'rb') as afile:
84                 buf = afile.read()
85                 hasher.update(buf)
86             if hasher.hexdigest() in binary_hash:
87                 logger.info('Found matching file hash for file: {0}'.
88                             format(patch_file))
89             else:
90                 logger.error('Non Whitelisted Binary file: {0}'.
91                              format(patch_file))
92                 logger.error('Please submit patch with this hash:: {0}'.
93                              format(hasher.hexdigest()))
94             failure = True
95             with open(reports_dir + "binaries-" + project + ".log", "a") \
96                     as gate_report:
97                 gate_report.write('Non Whitelisted Binary file: {0}\n'.
98                                   format(patch_file))
99     else:
100         # Check file names / extensions
101         if file_audit_list.search(patch_file) and not \
102                     file_audit_project_list.search(patch_file):
103             match = file_audit_list.search(patch_file)
104             logger.error('Blacklisted file: {0}'.
105                          format(patch_file))
106             logger.error('Matched String: {0}'.
107                          format(match.group()))
108             failure = True
109             with open(reports_dir + "file-names_" + project + ".log", "a") \
110                     as gate_report:
111                 gate_report.write('Blacklisted file: {0}\n'.
112                                   format(patch_file))
113                 gate_report.write('Matched String: {0}'.
114                                   format(match.group()))
115
116         # Open file to check for blacklisted content
117         fo = open(patch_file, 'r')
118         lines = fo.readlines()
119
120         for line in lines:
121             if file_content_list.search(line) and not \
122                     file_content_project_list.search(line):
123                 match = file_content_list.search(line)
124                 logger.error('File contains violation: {0}'.
125                              format(patch_file))
126                 logger.error('Flagged Content: {0}'.
127                              format(line.rstrip()))
128                 logger.error('Matched String: {0}'.
129                              format(match.group()))
130                 failure = True
131                 with open(reports_dir + "contents_" + project + ".log",
132                           "a") as gate_report:
133                     gate_report.write('File contains violation: {0}\n'.
134                                       format(patch_file))
135                     gate_report.write('Flagged Content: {0}'.
136                                       format(line))
137                     gate_report.write('Matched String: {0}\n'.
138                                       format(match.group()))
139
140         # Run license check
141         licence_check(project, licence_ext, licence_ignore, patch_file)
142
143
144 def licence_check(project, licence_ext,
145                   licence_ignore, patch_file):
146     """ Performs licence checks """
147     global failure
148     if patch_file.endswith(tuple(licence_ext)) \
149             and patch_file not in licence_ignore:
150         fo = open(patch_file, 'r')
151         content = fo.read()
152         # Note: Hardcoded use of 'copyright' & 'spdx' is the result
153         # of a decision made at 2017 plugfest to limit searches to
154         # just these two strings.
155         if re.search("copyright", content, re.IGNORECASE):
156             logger.info('Contains needed Licence string: {0}'.
157                         format(patch_file))
158         elif re.search("spdx", content, re.IGNORECASE):
159             logger.info('Contains needed Licence string: {0}'.
160                         format(patch_file))
161         else:
162             logger.error('Licence header missing in file: {0}'.
163                          format(patch_file))
164             failure = True
165             with open(reports_dir + "licence-" + project + ".log", "a") \
166                     as gate_report:
167                 gate_report.write('Licence header missing in file: {0}\n'.
168                                   format(patch_file))
169
170
171 def process_failure():
172     """ If any scan operations register a failure, sys.exit(1) is called
173         to allow jjb to register a failure"""
174     if failure:
175         logger.error('Please visit: https://wiki.opnfv.org/x/5oey')
176         sys.exit(1)