Initial code push of Anteater
[releng-anteater.git] / anteater / src / patch_scan.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 ##############################################################################
4 # Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
5 #
6 # All rights reserved. This program and the accompanying materials
7 # are made available under the terms of the Apache License, Version 2.0
8 # which accompanies this distribution, and is available at
9 # http://www.apache.org/licenses/LICENSE-2.0
10 ##############################################################################
11
12 """
13     Accepts the --patchset argument and iterates through each line of the
14     patchset file to perform various checks such as if the file is a binary, or
15     contains a blacklisted string. If any violations are found, the script
16     exits with code 1 and logs the violation(s) found.
17 """
18
19 from __future__ import division, print_function, absolute_import
20 from binaryornot.check import is_binary
21 import anteater.utils.anteater_logger as antlog
22 import anteater.src.get_lists as get_lists
23 import ConfigParser
24 import sys
25 import re
26
27
28 logger = antlog.Logger(__name__).getLogger()
29 config = ConfigParser.RawConfigParser()
30 config.read('anteater.conf')
31 reports_dir = config.get('config', 'reports_dir')
32 failure = False
33
34
35 def prepare_patchset(project, patchset):
36     """ Create black/white lists and default / project waivers
37         and iterates over patchset file """
38
39     # Get Various Lists / Project Waivers
40     lists = get_lists.GetLists()
41     # Get binary white list
42     binary_list, binary_project_list = lists.binary_list(project)
43
44     # Get file name black list and project waivers
45     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
46
47     # Get file content black list and project waivers
48     file_content_list, \
49         file_content_project_list = lists.file_content_list(project)
50
51     # Get Licence Lists
52     licence_ext = lists.licence_extensions()
53     licence_ignore = lists.licence_ignore()
54
55     # Open patch set to get file list
56     fo = open(patchset, 'r')
57     lines = fo.readlines()
58
59     for line in lines:
60         patch_file = line.strip('\n')
61         # Perform binary and file / content checks
62         scan_patch(project, patch_file, binary_list, binary_project_list,
63                    file_audit_list, file_audit_project_list,
64                    file_content_list, file_content_project_list, licence_ext,
65                    licence_ignore)
66
67     # Process each file in patch set using waivers generated above
68     # Process final result
69     process_failure()
70
71
72 def scan_patch(project, patch_file, binary_list, binary_project_list,
73                file_audit_list, file_audit_project_list, file_content_list,
74                file_content_project_list, licence_ext, licence_ignore):
75     """ Scan actions for each commited file in patch set """
76     global failure
77     if is_binary(patch_file):
78         if not binary_list.search(patch_file) and not binary_project_list\
79                 .search(patch_file):
80             logger.error('Non Whitelisted Binary file: {0}'.
81                          format(patch_file))
82             failure = True
83             with open(reports_dir + "binaries-" + project + ".log", "a") \
84                     as gate_report:
85                 gate_report.write('Non Whitelisted Binary file: {0}\n'.
86                                   format(patch_file))
87     else:
88         # Check file names / extensions
89         if file_audit_list.search(patch_file) and not \
90                     file_audit_project_list.search(patch_file):
91             match = file_audit_list.search(patch_file)
92             logger.error('Blacklisted file: {0}'.
93                          format(patch_file))
94             logger.error('Matched String: {0}'.
95                          format(match.group()))
96             failure = True
97             with open(reports_dir + "file-names_" + project + ".log", "a") \
98                     as gate_report:
99                 gate_report.write('Blacklisted file: {0}\n'.
100                                   format(patch_file))
101                 gate_report.write('Matched String: {0}'.
102                                   format(match.group()))
103
104         # Open file to check for blacklisted content
105         fo = open(patch_file, 'r')
106         lines = fo.readlines()
107
108         for line in lines:
109             if file_content_list.search(line) and not \
110                     file_content_project_list.search(line):
111                 match = file_content_list.search(line)
112                 logger.error('File contains violation: {0}'.
113                              format(patch_file))
114                 logger.error('Flagged Content: {0}'.
115                              format(line.rstrip()))
116                 logger.error('Matched String: {0}'.
117                              format(match.group()))
118                 failure = True
119                 with open(reports_dir + "contents_" + project + ".log",
120                           "a") as gate_report:
121                     gate_report.write('File contains violation: {0}\n'.
122                                       format(patch_file))
123                     gate_report.write('Flagged Content: {0}'.
124                                       format(line))
125                     gate_report.write('Matched String: {0}\n'.
126                                       format(match.group()))
127
128         # Run license check
129         licence_check(project, licence_ext, licence_ignore, patch_file)
130
131
132 def licence_check(project, licence_ext,
133                   licence_ignore, patch_file):
134     """ Performs licence checks """
135     global failure
136     if patch_file.endswith(tuple(licence_ext)) \
137             and patch_file not in licence_ignore:
138         fo = open(patch_file, 'r')
139         content = fo.read()
140         # Note: Hardcoded use of 'copyright' & 'spdx' is the result
141         # of a decision made at 2017 plugfest to limit searches to
142         # just these two strings.
143         if re.search("copyright", content, re.IGNORECASE):
144             logger.info('Contains needed Licence string: {0}'.
145                         format(patch_file))
146         elif re.search("spdx", content, re.IGNORECASE):
147             logger.info('Contains needed Licence string: {0}'.
148                         format(patch_file))
149         else:
150             logger.error('Licence header missing in file: {0}'.
151                          format(patch_file))
152             failure = True
153             with open(reports_dir + "licence-" + project + ".log", "a") \
154                     as gate_report:
155                 gate_report.write('Licence header missing in file: {0}\n'.
156                                   format(patch_file))
157
158
159 def process_failure():
160     """ If any scan operations register a failure, sys.exit(1) is called
161         to allow jjb to register a failure"""
162     if failure:
163         logger.error('Failures registered')
164         sys.exit(1)