From 48ce2a2317eb86b73b6da297c8ed9082b06faf9f Mon Sep 17 00:00:00 2001 From: lhinds Date: Sun, 25 Jun 2017 18:43:51 +0100 Subject: [PATCH 1/1] Implements sha256 exception functionality This patch adds functionality for file checksum verfications for binary files. The master_list.yaml binaries yaml directive now only contains simple exceptions (for common artefacts that are gitignored) Each project_exception file now has a filename and a sha256 hash. If a binary file is not found, or the hash is mismatched, it will output the hash for the user to include in an exception patch. This functionality has been added to complete project scans and patchset scans JIRA: RELENG-240 Change-Id: Iafa5710f4a0da192fc74335b1200b504413f8a8b Signed-off-by: lhinds --- anteater/src/get_lists.py | 25 ++++++++++--------------- anteater/src/patch_scan.py | 28 ++++++++++++++++++++-------- anteater/src/project_scan.py | 40 ++++++++++++++++++++++++++-------------- exceptions/sandbox.yaml | 4 +++- master_list.yaml | 12 ------------ 5 files changed, 59 insertions(+), 50 deletions(-) diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py index 713321f..d7b0c47 100644 --- a/anteater/src/get_lists.py +++ b/anteater/src/get_lists.py @@ -69,28 +69,23 @@ class GetLists(object): self.loaded = True def binary_list(self, project): - project_list = False - self.load_project_exception_file(yl.get('project_exceptions'), project) try: default_list = (yl['binaries']['binary_ignore']) except KeyError: logger.error('Key Error processing binary list values') - try: - project_list = (yl['binaries'][project]['binary_ignore']) - except KeyError: - logger.info('No binary waivers found for {0}'. - format(project)) binary_re = re.compile("|".join(default_list), - flags=re.IGNORECASE) + flags=re.IGNORECASE) + return binary_re - if project_list: - binary_project_re = re.compile("|".join(project_list), - flags=re.IGNORECASE) - return binary_re, binary_project_re - else: - binary_project_re = re.compile("") - return binary_re, binary_project_re + def binary_hash(self, project, patch_file): + self.load_project_exception_file(yl.get('project_exceptions'), project) + file_name = os.path.basename(patch_file) + try: + binary_hash = (yl['binaries'][project][file_name]) + except KeyError: + logger.error('Key Error processing binary hash values') + return binary_hash def file_audit_list(self, project): project_list = False diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py index 48c78fd..51b3430 100644 --- a/anteater/src/patch_scan.py +++ b/anteater/src/patch_scan.py @@ -21,6 +21,7 @@ from binaryornot.check import is_binary import anteater.utils.anteater_logger as antlog import anteater.src.get_lists as get_lists import ConfigParser +import hashlib import sys import re @@ -30,6 +31,7 @@ config = ConfigParser.RawConfigParser() config.read('anteater.conf') reports_dir = config.get('config', 'reports_dir') failure = False +hasher = hashlib.sha256() def prepare_patchset(project, patchset): @@ -39,7 +41,7 @@ def prepare_patchset(project, patchset): # Get Various Lists / Project Waivers lists = get_lists.GetLists() # Get binary white list - binary_list, binary_project_list = lists.binary_list(project) + binary_list = lists.binary_list(project) # Get file name black list and project waivers file_audit_list, file_audit_project_list = lists.file_audit_list(project) @@ -59,7 +61,7 @@ def prepare_patchset(project, patchset): for line in lines: patch_file = line.strip('\n') # Perform binary and file / content checks - scan_patch(project, patch_file, binary_list, binary_project_list, + scan_patch(project, patch_file, binary_list, file_audit_list, file_audit_project_list, file_content_list, file_content_project_list, licence_ext, licence_ignore) @@ -69,16 +71,26 @@ def prepare_patchset(project, patchset): process_failure() -def scan_patch(project, patch_file, binary_list, binary_project_list, - file_audit_list, file_audit_project_list, file_content_list, +def scan_patch(project, patch_file, binary_list, file_audit_list, + file_audit_project_list, file_content_list, file_content_project_list, licence_ext, licence_ignore): """ Scan actions for each commited file in patch set """ global failure if is_binary(patch_file): - if not binary_list.search(patch_file) and not binary_project_list\ - .search(patch_file): - logger.error('Non Whitelisted Binary file: {0}'. - format(patch_file)) + hashlist = get_lists.GetLists() + binary_hash = hashlist.binary_hash(project, patch_file) + if not binary_list.search(patch_file): + with open(patch_file, 'rb') as afile: + buf = afile.read() + hasher.update(buf) + if hasher.hexdigest() in binary_hash: + logger.info('Found matching file hash for file: {0}'. + format(patch_file)) + else: + logger.error('Non Whitelisted Binary file: {0}'. + format(patch_file)) + logger.error('Please submit patch with this hash:: {0}'. + format(hasher.hexdigest())) failure = True with open(reports_dir + "binaries-" + project + ".log", "a") \ as gate_report: diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py index c7c6f28..15498f1 100644 --- a/anteater/src/project_scan.py +++ b/anteater/src/project_scan.py @@ -17,6 +17,7 @@ from __future__ import division, print_function, absolute_import import ConfigParser +import hashlib import os import re import anteater.utils.anteater_logger as antlog @@ -29,6 +30,7 @@ config.read('anteater.conf') reports_dir = config.get('config', 'reports_dir') master_list = config.get('config', 'master_list') ignore_dirs = ['.git'] +hasher = hashlib.sha256() def prepare_project(project, project_dir): @@ -38,7 +40,7 @@ def prepare_project(project, project_dir): lists = get_lists.GetLists() # Get binary white list - binary_list, binary_project_list = lists.binary_list(project) + binary_list = lists.binary_list(project) # Get file name black list and project waivers file_audit_list, file_audit_project_list = lists.file_audit_list(project) @@ -51,8 +53,8 @@ def prepare_project(project, project_dir): licence_ignore = lists.licence_ignore() # Perform rudimentary scans - scan_file(project_dir, project, binary_list, binary_project_list, - file_audit_list, file_audit_project_list, file_content_list, + scan_file(project_dir, project, binary_list,file_audit_list, + file_audit_project_list, file_content_list, project_content_list) # Perform licence header checks @@ -60,8 +62,8 @@ def prepare_project(project, project_dir): licence_root_check(project_dir, project) -def scan_file(project_dir, project, binary_list, binary_project_list, - file_audit_list, file_audit_project_list, file_content_list, +def scan_file(project_dir, project, binary_list, file_audit_list, + file_audit_project_list, file_content_list, project_content_list): """Searches for banned strings and files that are listed """ for root, dirs, files in os.walk(project_dir): @@ -114,15 +116,25 @@ def scan_file(project_dir, project, binary_list, binary_project_list, format(match.group())) else: # Check if Binary is whitelisted - if not binary_list.search(full_path) \ - and not binary_project_list.search(full_path): - logger.error('Non Whitelisted Binary: {0}'. - format(full_path)) - with open(reports_dir + "binaries-" + project + ".log", - "a") \ - as gate_report: - gate_report.write('Non Whitelisted Binary: {0}\n'. - format(full_path)) + hashlist = get_lists.GetLists() + binary_hash = hashlist.binary_hash(project, full_path) + if not binary_list.search(full_path): + with open(full_path, 'rb') as afile: + buf = afile.read() + hasher.update(buf) + if hasher.hexdigest() in binary_hash: + logger.info('Found matching file hash for file: {0}'. + format(full_path)) + else: + logger.error('Non Whitelisted Binary file: {0}'. + format(full_path)) + logger.error('Please submit patch with this hash: {0}'. + format(hasher.hexdigest())) + with open(reports_dir + "binaries-" + project + ".log", + "a") \ + as gate_report: + gate_report.write('Non Whitelisted Binary: {0}\n'. + format(full_path)) def licence_root_check(project_dir, project): diff --git a/exceptions/sandbox.yaml b/exceptions/sandbox.yaml index e41b04a..b0f8d24 100644 --- a/exceptions/sandbox.yaml +++ b/exceptions/sandbox.yaml @@ -5,7 +5,9 @@ # of escaping YAML delimiters too (such as `:`) using double quotes "". binaries: - binary_ignore: [nullvalue] + ping: + - d0d7dfc73e0fac09d920ebbdf8cd4e0ef623f15d6246ff20d7a6d12c9a48bf41 + file_audits: file_names: [nullvalue] file_contents: diff --git a/master_list.yaml b/master_list.yaml index 309876c..ded5015 100644 --- a/master_list.yaml +++ b/master_list.yaml @@ -6,19 +6,7 @@ binaries: binary_ignore: - - \.DS_Store - - \.eot - - \.gif - \.git/(index|objects) - - \.ico - - \.idx - - \.jp(e?)g - - \.otf - - \.pack - - \.pdf - - \.png - - \.ttf - - \.woff file_audits: file_names: -- 2.16.6