X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=anteater%2Fsrc%2Fproject_scan.py;h=3a04a8359b60c3917411551762122f42f192b42a;hb=8365245c9dc2f8e2ff2adf8aa84162e420747132;hp=15498f14ab1cb93d406100e8796655c6013056b8;hpb=5ab9c733f380b2a327e64fcfa760d28639994982;p=releng-anteater.git diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py index 15498f1..3a04a83 100644 --- a/anteater/src/project_scan.py +++ b/anteater/src/project_scan.py @@ -16,19 +16,21 @@ """ from __future__ import division, print_function, absolute_import -import ConfigParser import hashlib +import six.moves.configparser import os import re -import anteater.utils.anteater_logger as antlog -import anteater.src.get_lists as get_lists +import logging from binaryornot.check import is_binary -logger = antlog.Logger(__name__).getLogger() -config = ConfigParser.RawConfigParser() +from . import get_lists + +logger = logging.getLogger(__name__) +config = six.moves.configparser.RawConfigParser() config.read('anteater.conf') reports_dir = config.get('config', 'reports_dir') master_list = config.get('config', 'master_list') +ignore_list = config.get('config', 'master_list') ignore_dirs = ['.git'] hasher = hashlib.sha256() @@ -46,7 +48,10 @@ def prepare_project(project, project_dir): file_audit_list, file_audit_project_list = lists.file_audit_list(project) # Get file content black list and project waivers - file_content_list, project_content_list = lists.file_content_list(project) + master_list, ignore_list = lists.file_content_list(project) + + # Get File Ignore Lists + file_ignore = lists.file_ignore() # Get Licence Lists licence_ext = lists.licence_extensions() @@ -54,8 +59,8 @@ def prepare_project(project, project_dir): # Perform rudimentary scans scan_file(project_dir, project, binary_list,file_audit_list, - file_audit_project_list, file_content_list, - project_content_list) + file_audit_project_list, master_list, ignore_list, + file_ignore) # Perform licence header checks licence_check(licence_ext, licence_ignore, project, project_dir) @@ -63,8 +68,8 @@ def prepare_project(project, project_dir): def scan_file(project_dir, project, binary_list, file_audit_list, - file_audit_project_list, file_content_list, - project_content_list): + file_audit_project_list, master_list, ignore_list, + file_ignore): """Searches for banned strings and files that are listed """ for root, dirs, files in os.walk(project_dir): # Filter out ignored directories from list. @@ -75,10 +80,8 @@ def scan_file(project_dir, project, binary_list, file_audit_list, if file_audit_list.search(full_path) and not \ file_audit_project_list.search(full_path): match = file_audit_list.search(full_path) - logger.error('Blacklisted filename: {0}'. - format(full_path)) - logger.error('Matched String: {0}'. - format(match.group())) + logger.error('Blacklisted filename: %s', full_path) + logger.error('Matched String: %s', match.group()) with open(reports_dir + "file-names_" + project + ".log", "a") as gate_report: gate_report. \ @@ -88,23 +91,54 @@ def scan_file(project_dir, project, binary_list, file_audit_list, write('Matched String: {0}'. format(match.group())) - if not is_binary(full_path): - fo = open(full_path, 'r') - lines = fo.readlines() - for line in lines: - # Check for sensitive content in project files - if file_content_list.search(line) and not \ - project_content_list.search(line): - match = file_content_list.search(line) - logger.error('File contains violation: {0}'. - format(full_path)) - logger.error('Flagged Content: {0}'. - format(line.rstrip())) - logger.error('Matched String: {0}'. - format(match.group())) - with open(reports_dir + "contents-" + project + ".log", - "a") \ - as gate_report: + # Check if Binary is whitelisted + hashlist = get_lists.GetLists() + binary_hash = hashlist.binary_hash(project, full_path) + + if is_binary(full_path) and not binary_list.search(full_path): + with open(full_path, 'rb') as afile: + buf = afile.read() + hasher.update(buf) + if hasher.hexdigest() in binary_hash: + logger.info('Found matching file hash for file: %s', + full_path) + else: + logger.error('Non Whitelisted Binary file: %s', + full_path) + logger.error('Please submit patch with this hash: %s', + hasher.hexdigest()) + with open(reports_dir + "binaries-" + project + ".log", + "a") as gate_report: + gate_report.write('Non Whitelisted Binary: {0}\n'. + format(full_path)) + gate_report.write( + 'Submit patch with the following hash: {0}\n'. + format(hasher.hexdigest())) + + else: + if not items.endswith(tuple(file_ignore)): + try: + fo = open(full_path, 'r') + lines = fo.readlines() + except IOError: + logger.error('%s does not exist', full_path) + + for line in lines: + # Check for sensitive content in project files + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search( + ignore_list, line): + logger.error('File contains violation: %s', + full_path) + logger.error('Flagged Content: %s', + line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) + with open(reports_dir + "contents-" + project + + ".log", "a") \ + as gate_report: gate_report. \ write('File contains violation: {0}\n'. format(full_path)) @@ -112,38 +146,19 @@ def scan_file(project_dir, project, binary_list, file_audit_list, write('Flagged Content: {0}'. format(line)) gate_report. \ - write('Matched String: {0}\n'. - format(match.group())) - else: - # Check if Binary is whitelisted - hashlist = get_lists.GetLists() - binary_hash = hashlist.binary_hash(project, full_path) - if not binary_list.search(full_path): - with open(full_path, 'rb') as afile: - buf = afile.read() - hasher.update(buf) - if hasher.hexdigest() in binary_hash: - logger.info('Found matching file hash for file: {0}'. - format(full_path)) - else: - logger.error('Non Whitelisted Binary file: {0}'. - format(full_path)) - logger.error('Please submit patch with this hash: {0}'. - format(hasher.hexdigest())) - with open(reports_dir + "binaries-" + project + ".log", - "a") \ - as gate_report: - gate_report.write('Non Whitelisted Binary: {0}\n'. - format(full_path)) + write('Matched Regular Exp: {0}'. + format(regex)) + gate_report. \ + write('Rationale: {0}\n'. + format(desc.rstrip())) + def licence_root_check(project_dir, project): if os.path.isfile(project_dir + '/LICENSE'): - logger.info('LICENSE file present in: {0}'. - format(project_dir)) + logger.info('LICENSE file present in: %s', project_dir) else: - logger.error('LICENSE file missing in: {0}'. - format(project_dir)) + logger.error('LICENSE file missing in: %s', project_dir) with open(reports_dir + "licence-" + project + ".log", "a") \ as gate_report: @@ -165,15 +180,12 @@ def licence_check(licence_ext, licence_ignore, project, project_dir): # Note: Hardcoded use of 'copyright' & 'spdx' is the result # of a decision made at 2017 plugfest to limit searches to # just these two strings. - if re.search("copyright", content, re.IGNORECASE): - logger.info('Licence string present: {0}'. - format(full_path)) - elif re.search("spdx", content, re.IGNORECASE): - logger.info('Licence string present: {0}'. - format(full_path)) + patterns = ['copyright', 'spdx', + 'http://creativecommons.org/licenses/by/4.0'] + if any(i in content.lower() for i in patterns): + logger.info('Licence string present: %s', full_path) else: - logger.error('Licence header missing: {0}'. - format(full_path)) + logger.error('Licence header missing: %s', full_path) with open(reports_dir + "licence-" + project + ".log", "a") \ as gate_report: