2 # -*- coding: utf-8 -*-
3 ##############################################################################
4 # Copyright (c) 2017 Luke Hinds <lhinds@redhat.com>, Red Hat
6 # All rights reserved. This program and the accompanying materials
7 # are made available under the terms of the Apache License, Version 2.0
8 # which accompanies this distribution, and is available at
9 # http://www.apache.org/licenses/LICENSE-2.0
10 ##############################################################################
13 Accepts the --path argument and iterates the root directory using os.walk
14 If a file is a binary, or contains a blacklisted string. If any violations
15 are found, the script adds the violation to a log file.
18 from __future__ import division, print_function, absolute_import
22 import anteater.utils.anteater_logger as antlog
23 import anteater.src.get_lists as get_lists
24 from binaryornot.check import is_binary
26 logger = antlog.Logger(__name__).getLogger()
27 config = ConfigParser.RawConfigParser()
28 config.read('anteater.conf')
29 reports_dir = config.get('config', 'reports_dir')
30 gate_checks = config.get('config', 'gate_checks')
31 ignore_dirs = ['.git']
34 def prepare_project(project, project_dir):
35 """ Generates blacklists / whitelists and calls main functions """
37 # Get Various Lists / Project Waivers
38 lists = get_lists.GetLists()
40 # Get binary white list
41 binary_list, binary_project_list = lists.binary_list(project)
43 # Get file name black list and project waivers
44 file_audit_list, file_audit_project_list = lists.file_audit_list(project)
46 # Get file content black list and project waivers
47 file_content_list, project_content_list = lists.file_content_list(project)
50 licence_ext = lists.licence_extensions()
51 licence_ignore = lists.licence_ignore()
53 # Perform rudimentary scans
54 scan_file(project_dir, project, binary_list, binary_project_list,
55 file_audit_list, file_audit_project_list, file_content_list,
58 # Perform licence header checks
59 licence_check(licence_ext, licence_ignore, project, project_dir)
62 def scan_file(project_dir, project, binary_list, binary_project_list,
63 file_audit_list, file_audit_project_list, file_content_list,
64 project_content_list):
65 """Searches for banned strings and files that are listed """
66 for root, dirs, files in os.walk(project_dir):
67 # Filter out ignored directories from list.
68 dirs[:] = [d for d in dirs if d not in ignore_dirs]
70 full_path = os.path.join(root, items)
71 # Check for Blacklisted file names
72 if file_audit_list.search(full_path) and not \
73 file_audit_project_list.search(full_path):
74 match = file_audit_list.search(full_path)
75 logger.error('Blacklisted filename: {0}'.
77 logger.error('Matched String: {0}'.
78 format(match.group()))
79 with open(reports_dir + "file-names_" + project + ".log",
82 write('Blacklisted filename: {0}\n'.
85 write('Matched String: {0}'.
86 format(match.group()))
88 if not is_binary(full_path):
89 fo = open(full_path, 'r')
90 lines = fo.readlines()
92 # Check for sensitive content in project files
93 if file_content_list.search(line) and not \
94 project_content_list.search(line):
95 match = file_content_list.search(line)
96 logger.error('File contains violation: {0}'.
98 logger.error('Flagged Content: {0}'.
99 format(line.rstrip()))
100 logger.error('Matched String: {0}'.
101 format(match.group()))
102 with open(reports_dir + "contents-" + project + ".log",
106 write('File contains violation: {0}\n'.
109 write('Flagged Content: {0}'.
112 write('Matched String: {0}\n'.
113 format(match.group()))
115 # Check if Binary is whitelisted
116 if not binary_list.search(full_path) \
117 and not binary_project_list.search(full_path):
118 logger.error('Non Whitelisted Binary: {0}'.
120 with open(reports_dir + "binaries-" + project + ".log",
123 gate_report.write('Non Whitelisted Binary: {0}\n'.
127 def licence_check(licence_ext, licence_ignore, project, project_dir):
128 """ Peform basic checks for the presence of licence strings """
129 for root, dirs, files in os.walk(project_dir):
130 dirs[:] = [d for d in dirs if d not in ignore_dirs]
132 if file.endswith(tuple(licence_ext)) \
133 and file not in licence_ignore:
134 full_path = os.path.join(root, file)
135 if not is_binary(full_path):
136 fo = open(full_path, 'r')
138 # Note: Hardcoded use of 'copyright' & 'spdx' is the result
139 # of a decision made at 2017 plugfest to limit searches to
140 # just these two strings.
141 if re.search("copyright", content, re.IGNORECASE):
142 logger.info('Licence string present: {0}'.
144 elif re.search("spdx", content, re.IGNORECASE):
145 logger.info('Licence string present: {0}'.
148 logger.error('Licence header missing: {0}'.
150 with open(reports_dir + "licence-" + project + ".log",
153 gate_report.write('Licence header missing: {0}\n'.