Implements full path for hash checks of binaries
[releng-anteater.git] / anteater / src / patch_scan.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 ##############################################################################
4 # Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
5 #
6 # All rights reserved. This program and the accompanying materials
7 # are made available under the terms of the Apache License, Version 2.0
8 # which accompanies this distribution, and is available at
9 # http://www.apache.org/licenses/LICENSE-2.0
10 ##############################################################################
11
12 """
13     Accepts the --patchset argument and iterates through each line of the
14     patchset file to perform various checks such as if the file is a binary, or
15     contains a blacklisted string. If any violations are found, the script
16     exits with code 1 and logs the violation(s) found.
17 """
18
19 from __future__ import division, print_function, absolute_import
20 from binaryornot.check import is_binary
21 import logging
22 import hashlib
23 import six.moves.configparser
24 import sys
25 import re
26
27 from . import get_lists
28
29 logger = logging.getLogger(__name__)
30 config = six.moves.configparser.RawConfigParser()
31 config.read('anteater.conf')
32 reports_dir = config.get('config', 'reports_dir')
33 failure = False
34 hasher = hashlib.sha256()
35
36
37 def prepare_patchset(project, patchset):
38     """ Create black/white lists and default / project waivers
39         and iterates over patchset file """
40
41     # Get Various Lists / Project Waivers
42     lists = get_lists.GetLists()
43     # Get binary white list
44     binary_list = lists.binary_list(project)
45
46     # Get file name black list and project waivers
47     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
48
49     # Get file content black list and project waivers
50     master_list, ignore_list = lists.file_content_list(project)
51
52     # Get File Ignore Lists
53     file_ignore = lists.file_ignore()
54
55     # Get Licence Lists
56     licence_ext = lists.licence_extensions()
57     licence_ignore = lists.licence_ignore()
58
59     # Open patch set to get file list
60     try:
61         fo = open(patchset, 'r')
62         lines = fo.readlines()
63     except IOError:
64         logger.error('%s does not exist', patchset)
65         sys.exit(1)
66
67     for line in lines:
68         patch_file = line.strip('\n')
69         # Perform binary and file / content checks
70         scan_patch(project, patch_file, binary_list,
71                    file_audit_list, file_audit_project_list,
72                    master_list, ignore_list, licence_ext,
73                    file_ignore, licence_ignore)
74
75     # Process each file in patch set using waivers generated above
76     # Process final result
77     process_failure()
78
79
80 def scan_patch(project, patch_file, binary_list, file_audit_list,
81                file_audit_project_list, master_list,
82                ignore_list, licence_ext, file_ignore, licence_ignore):
83     """ Scan actions for each commited file in patch set """
84     global failure
85     if is_binary(patch_file):
86         hashlist = get_lists.GetLists()
87         split_path = patch_file.split(project + '/', 1)[-1]
88         binary_hash = hashlist.binary_hash(project, split_path)
89         if not binary_list.search(patch_file):
90             with open(patch_file, 'rb') as afile:
91                 buf = afile.read()
92                 hasher.update(buf)
93             if hasher.hexdigest() in binary_hash:
94                 logger.info('Found matching file hash for file: %s',
95                             patch_file)
96             else:
97                 logger.error('Non Whitelisted Binary file: %s',
98                              patch_file)
99                 logger.error('Submit patch with the following hash: %s',
100                              hasher.hexdigest())
101             failure = True
102             with open(reports_dir + "binaries-" + project + ".log", "a") \
103                     as gate_report:
104                 gate_report.write('Non Whitelisted Binary file: {0}\n'.
105                                   format(patch_file))
106                 gate_report.write('Submit patch with the following hash: {0}\n'.
107                                   format(hasher.hexdigest()))
108
109     else:
110         # Check file names / extensions
111         if file_audit_list.search(patch_file) and not \
112                     file_audit_project_list.search(patch_file):
113             match = file_audit_list.search(patch_file)
114             logger.error('Blacklisted file: %s', patch_file)
115             logger.error('Matched String: %s', match.group())
116             failure = True
117             with open(reports_dir + "file-names_" + project + ".log", "a") \
118                     as gate_report:
119                 gate_report.write('Blacklisted file: {0}\n'.
120                                   format(patch_file))
121                 gate_report.write('Matched String: {0}'.
122                                   format(match.group()))
123
124         # Open file to check for blacklisted content
125         try:
126             fo = open(patch_file, 'r')
127             lines = fo.readlines()
128             file_exists = True
129         except IOError:
130             file_exists = False
131
132         if file_exists and not patch_file.endswith(tuple(file_ignore)):
133             for line in lines:
134                 for key, value in master_list.iteritems():
135                     regex = value['regex']
136                     desc = value['desc']
137                     if re.search(regex, line) and not re.search(
138                             ignore_list, line):
139                         logger.error('File contains violation: %s', patch_file)
140                         logger.error('Flagged Content: %s', line.rstrip())
141                         logger.error('Matched Regular Exp: %s', regex)
142                         logger.error('Rationale: %s', desc.rstrip())
143                         failure = True
144                         with open(reports_dir + "contents_" + project + ".log",
145                                   "a") as gate_report:
146                             gate_report.write('File contains violation: {0}\n'.
147                                               format(patch_file))
148                             gate_report.write('Flagged Content: {0}'.
149                                               format(line))
150                             gate_report.write('Matched Regular Exp: {0}\n'.
151                                               format(regex))
152                             gate_report.write('Rationale: {0}\n'.
153                                               format(desc.rstrip()))
154             # Run license check
155             licence_check(project, licence_ext, licence_ignore, patch_file)
156
157
158 def licence_check(project, licence_ext,
159                   licence_ignore, patch_file):
160     """ Performs licence checks """
161     global failure
162     if patch_file.endswith(tuple(licence_ext)) \
163             and patch_file not in licence_ignore:
164         fo = open(patch_file, 'r')
165         content = fo.read()
166         # Note: Hardcoded use of 'copyright' & 'spdx' is the result
167         # of a decision made at 2017 plugfest to limit searches to
168         # just these two strings.
169         patterns = ['copyright', 'spdx',
170                     'http://creativecommons.org/licenses/by/4.0']
171         if any(i in content.lower() for i in patterns):
172             logger.info('Contains needed Licence string: %s', patch_file)
173         else:
174             logger.error('Licence header missing in file: %s', patch_file)
175             failure = True
176             with open(reports_dir + "licence-" + project + ".log", "a") \
177                     as gate_report:
178                 gate_report.write('Licence header missing in file: {0}\n'.
179                                   format(patch_file))
180
181
182 def process_failure():
183     """ If any scan operations register a failure, sys.exit(1) is called
184         to allow jjb to register a failure"""
185     if failure:
186         logger.error('Please visit: https://wiki.opnfv.org/x/5oey')
187         sys.exit(1)