2 # -*- coding: utf-8 -*-
3 ##############################################################################
4 # Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
6 # All rights reserved. This program and the accompanying materials
7 # are made available under the terms of the Apache License, Version 2.0
8 # which accompanies this distribution, and is available at
9 # http://www.apache.org/licenses/LICENSE-2.0
10 ##############################################################################
13 Accepts the --path argument and iterates the root directory using os.walk
14 If a file is a binary, or contains a blacklisted string. If any violations
15 are found, the script adds the violation to a log file.
18 from __future__ import division, print_function, absolute_import
20 import six.moves.configparser
24 from binaryornot.check import is_binary
26 from . import get_lists
28 logger = logging.getLogger(__name__)
29 config = six.moves.configparser.RawConfigParser()
30 config.read('anteater.conf')
31 reports_dir = config.get('config', 'reports_dir')
32 master_list = config.get('config', 'master_list')
33 ignore_list = config.get('config', 'master_list')
34 ignore_dirs = ['.git']
35 hasher = hashlib.sha256()
38 def prepare_project(project, project_dir):
39 """ Generates blacklists / whitelists and calls main functions """
41 # Get Various Lists / Project Waivers
42 lists = get_lists.GetLists()
44 # Get binary white list
45 binary_list = lists.binary_list(project)
47 # Get file name black list and project waivers
48 file_audit_list, file_audit_project_list = lists.file_audit_list(project)
50 # Get file content black list and project waivers
51 master_list, ignore_list = lists.file_content_list(project)
53 # Get File Ignore Lists
54 file_ignore = lists.file_ignore()
57 licence_ext = lists.licence_extensions()
58 licence_ignore = lists.licence_ignore()
60 # Perform rudimentary scans
61 scan_file(project_dir, project, binary_list,file_audit_list,
62 file_audit_project_list, master_list, ignore_list,
65 # Perform licence header checks
66 licence_check(licence_ext, licence_ignore, project, project_dir)
67 licence_root_check(project_dir, project)
70 def scan_file(project_dir, project, binary_list, file_audit_list,
71 file_audit_project_list, master_list, ignore_list,
73 """Searches for banned strings and files that are listed """
74 for root, dirs, files in os.walk(project_dir):
75 # Filter out ignored directories from list.
76 dirs[:] = [d for d in dirs if d not in ignore_dirs]
78 full_path = os.path.join(root, items)
79 # Check for Blacklisted file names
80 if file_audit_list.search(full_path) and not \
81 file_audit_project_list.search(full_path):
82 match = file_audit_list.search(full_path)
83 logger.error('Blacklisted filename: %s', full_path)
84 logger.error('Matched String: %s', match.group())
85 with open(reports_dir + "file-names_" + project + ".log",
88 write('Blacklisted filename: {0}\n'.
91 write('Matched String: {0}'.
92 format(match.group()))
94 # Check if Binary is whitelisted
95 hashlist = get_lists.GetLists()
96 binary_hash = hashlist.binary_hash(project, full_path)
98 if is_binary(full_path) and not binary_list.search(full_path):
99 with open(full_path, 'rb') as afile:
102 if hasher.hexdigest() in binary_hash:
103 logger.info('Found matching file hash for file: %s',
106 logger.error('Non Whitelisted Binary file: %s',
108 logger.error('Please submit patch with this hash: %s',
110 with open(reports_dir + "binaries-" + project + ".log",
112 gate_report.write('Non Whitelisted Binary: {0}\n'.
115 'Submit patch with the following hash: {0}\n'.
116 format(hasher.hexdigest()))
119 if not items.endswith(tuple(file_ignore)):
121 fo = open(full_path, 'r')
122 lines = fo.readlines()
124 logger.error('%s does not exist', full_path)
127 # Check for sensitive content in project files
128 for key, value in master_list.iteritems():
129 regex = value['regex']
131 if re.search(regex, line) and not re.search(
133 logger.error('File contains violation: %s',
135 logger.error('Flagged Content: %s',
137 logger.error('Matched Regular Exp: %s', regex)
138 logger.error('Rationale: %s', desc.rstrip())
139 with open(reports_dir + "contents-" + project
143 write('File contains violation: {0}\n'.
146 write('Flagged Content: {0}'.
149 write('Matched Regular Exp: {0}'.
152 write('Rationale: {0}\n'.
153 format(desc.rstrip()))
157 def licence_root_check(project_dir, project):
158 if os.path.isfile(project_dir + '/LICENSE'):
159 logger.info('LICENSE file present in: %s', project_dir)
161 logger.error('LICENSE file missing in: %s', project_dir)
162 with open(reports_dir + "licence-" + project + ".log",
165 gate_report.write('LICENSE file missing in: {0}\n'.
169 def licence_check(licence_ext, licence_ignore, project, project_dir):
170 """ Peform basic checks for the presence of licence strings """
171 for root, dirs, files in os.walk(project_dir):
172 dirs[:] = [d for d in dirs if d not in ignore_dirs]
174 if file.endswith(tuple(licence_ext)) \
175 and file not in licence_ignore:
176 full_path = os.path.join(root, file)
177 if not is_binary(full_path):
178 fo = open(full_path, 'r')
180 # Note: Hardcoded use of 'copyright' & 'spdx' is the result
181 # of a decision made at 2017 plugfest to limit searches to
182 # just these two strings.
183 patterns = ['copyright', 'spdx',
184 'http://creativecommons.org/licenses/by/4.0']
185 if any(i in content.lower() for i in patterns):
186 logger.info('Licence string present: %s', full_path)
188 logger.error('Licence header missing: %s', full_path)
189 with open(reports_dir + "licence-" + project + ".log",
192 gate_report.write('Licence header missing: {0}\n'.