2 # -*- coding: utf-8 -*-
3 ##############################################################################
4 # Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
6 # All rights reserved. This program and the accompanying materials
7 # are made available under the terms of the Apache License, Version 2.0
8 # which accompanies this distribution, and is available at
9 # http://www.apache.org/licenses/LICENSE-2.0
10 ##############################################################################
13 Accepts the --path argument and iterates the root directory using os.walk
14 If a file is a binary, or contains a blacklisted string. If any violations
15 are found, the script adds the violation to a log file.
18 from __future__ import division, print_function, absolute_import
20 import six.moves.configparser
23 import anteater.utils.anteater_logger as antlog
24 from binaryornot.check import is_binary
26 from . import get_lists
28 logger = antlog.Logger(__name__).getLogger()
29 config = six.moves.configparser.RawConfigParser()
30 config.read('anteater.conf')
31 reports_dir = config.get('config', 'reports_dir')
32 master_list = config.get('config', 'master_list')
33 ignore_dirs = ['.git']
34 hasher = hashlib.sha256()
37 def prepare_project(project, project_dir):
38 """ Generates blacklists / whitelists and calls main functions """
40 # Get Various Lists / Project Waivers
41 lists = get_lists.GetLists()
43 # Get binary white list
44 binary_list = lists.binary_list(project)
46 # Get file name black list and project waivers
47 file_audit_list, file_audit_project_list = lists.file_audit_list(project)
49 # Get file content black list and project waivers
50 file_content_list, project_content_list = lists.file_content_list(project)
53 licence_ext = lists.licence_extensions()
54 licence_ignore = lists.licence_ignore()
56 # Perform rudimentary scans
57 scan_file(project_dir, project, binary_list,file_audit_list,
58 file_audit_project_list, file_content_list,
61 # Perform licence header checks
62 licence_check(licence_ext, licence_ignore, project, project_dir)
63 licence_root_check(project_dir, project)
66 def scan_file(project_dir, project, binary_list, file_audit_list,
67 file_audit_project_list, file_content_list,
68 project_content_list):
69 """Searches for banned strings and files that are listed """
70 for root, dirs, files in os.walk(project_dir):
71 # Filter out ignored directories from list.
72 dirs[:] = [d for d in dirs if d not in ignore_dirs]
74 full_path = os.path.join(root, items)
75 # Check for Blacklisted file names
76 if file_audit_list.search(full_path) and not \
77 file_audit_project_list.search(full_path):
78 match = file_audit_list.search(full_path)
79 logger.error('Blacklisted filename: {0}'.
81 logger.error('Matched String: {0}'.
82 format(match.group()))
83 with open(reports_dir + "file-names_" + project + ".log",
86 write('Blacklisted filename: {0}\n'.
89 write('Matched String: {0}'.
90 format(match.group()))
92 if not is_binary(full_path):
93 fo = open(full_path, 'r')
94 lines = fo.readlines()
96 # Check for sensitive content in project files
97 if file_content_list.search(line) and not \
98 project_content_list.search(line):
99 match = file_content_list.search(line)
100 logger.error('File contains violation: {0}'.
102 logger.error('Flagged Content: {0}'.
103 format(line.rstrip()))
104 logger.error('Matched String: {0}'.
105 format(match.group()))
106 with open(reports_dir + "contents-" + project + ".log",
110 write('File contains violation: {0}\n'.
113 write('Flagged Content: {0}'.
116 write('Matched String: {0}\n'.
117 format(match.group()))
119 # Check if Binary is whitelisted
120 hashlist = get_lists.GetLists()
121 binary_hash = hashlist.binary_hash(project, full_path)
122 if not binary_list.search(full_path):
123 with open(full_path, 'rb') as afile:
126 if hasher.hexdigest() in binary_hash:
127 logger.info('Found matching file hash for file: {0}'.
130 logger.error('Non Whitelisted Binary file: {0}'.
132 logger.error('Please submit patch with this hash: {0}'.
133 format(hasher.hexdigest()))
134 with open(reports_dir + "binaries-" + project + ".log",
137 gate_report.write('Non Whitelisted Binary: {0}\n'.
141 def licence_root_check(project_dir, project):
142 if os.path.isfile(project_dir + '/LICENSE'):
143 logger.info('LICENSE file present in: {0}'.
146 logger.error('LICENSE file missing in: {0}'.
148 with open(reports_dir + "licence-" + project + ".log",
151 gate_report.write('LICENSE file missing in: {0}\n'.
155 def licence_check(licence_ext, licence_ignore, project, project_dir):
156 """ Peform basic checks for the presence of licence strings """
157 for root, dirs, files in os.walk(project_dir):
158 dirs[:] = [d for d in dirs if d not in ignore_dirs]
160 if file.endswith(tuple(licence_ext)) \
161 and file not in licence_ignore:
162 full_path = os.path.join(root, file)
163 if not is_binary(full_path):
164 fo = open(full_path, 'r')
166 # Note: Hardcoded use of 'copyright' & 'spdx' is the result
167 # of a decision made at 2017 plugfest to limit searches to
168 # just these two strings.
169 if re.search("copyright", content, re.IGNORECASE):
170 logger.info('Licence string present: {0}'.
172 elif re.search("spdx", content, re.IGNORECASE):
173 logger.info('Licence string present: {0}'.
176 logger.error('Licence header missing: {0}'.
178 with open(reports_dir + "licence-" + project + ".log",
181 gate_report.write('Licence header missing: {0}\n'.