Implements sha256 exception functionality 49/36449/2
authorlhinds <lhinds@redhat.com>
Sun, 25 Jun 2017 17:43:51 +0000 (18:43 +0100)
committerlhinds <lhinds@redhat.com>
Sun, 25 Jun 2017 17:50:26 +0000 (18:50 +0100)
This patch adds functionality for file checksum verfications for
binary files.

The master_list.yaml binaries yaml directive now only contains
simple exceptions (for common artefacts that are gitignored)

Each project_exception file now has a filename and a sha256 hash.

If a binary file is not found, or the hash is mismatched,
it will output the hash for the user to include in an exception
patch.

This functionality has been added to complete project scans
and patchset scans

JIRA: RELENG-240

Change-Id: Iafa5710f4a0da192fc74335b1200b504413f8a8b
Signed-off-by: lhinds <lhinds@redhat.com>
anteater/src/get_lists.py
anteater/src/patch_scan.py
anteater/src/project_scan.py
exceptions/sandbox.yaml
master_list.yaml

index 713321f..d7b0c47 100644 (file)
@@ -69,28 +69,23 @@ class GetLists(object):
             self.loaded = True
 
     def binary_list(self, project):
-        project_list = False
-        self.load_project_exception_file(yl.get('project_exceptions'), project)
         try:
             default_list = (yl['binaries']['binary_ignore'])
         except KeyError:
             logger.error('Key Error processing binary list values')
-        try:
-            project_list = (yl['binaries'][project]['binary_ignore'])
-        except KeyError:
-            logger.info('No binary waivers found for {0}'.
-                        format(project))
 
         binary_re = re.compile("|".join(default_list),
-                flags=re.IGNORECASE)
+                               flags=re.IGNORECASE)
+        return binary_re
 
-        if project_list:
-            binary_project_re = re.compile("|".join(project_list),
-                                           flags=re.IGNORECASE)
-            return binary_re, binary_project_re
-        else:
-            binary_project_re = re.compile("")
-            return binary_re, binary_project_re
+    def binary_hash(self, project, patch_file):
+        self.load_project_exception_file(yl.get('project_exceptions'), project)
+        file_name = os.path.basename(patch_file)
+        try:
+            binary_hash = (yl['binaries'][project][file_name])
+        except KeyError:
+            logger.error('Key Error processing binary hash values')
+        return binary_hash
 
     def file_audit_list(self, project):
         project_list = False
index 48c78fd..51b3430 100644 (file)
@@ -21,6 +21,7 @@ from binaryornot.check import is_binary
 import anteater.utils.anteater_logger as antlog
 import anteater.src.get_lists as get_lists
 import ConfigParser
+import hashlib
 import sys
 import re
 
@@ -30,6 +31,7 @@ config = ConfigParser.RawConfigParser()
 config.read('anteater.conf')
 reports_dir = config.get('config', 'reports_dir')
 failure = False
+hasher = hashlib.sha256()
 
 
 def prepare_patchset(project, patchset):
@@ -39,7 +41,7 @@ def prepare_patchset(project, patchset):
     # Get Various Lists / Project Waivers
     lists = get_lists.GetLists()
     # Get binary white list
-    binary_list, binary_project_list = lists.binary_list(project)
+    binary_list = lists.binary_list(project)
 
     # Get file name black list and project waivers
     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
@@ -59,7 +61,7 @@ def prepare_patchset(project, patchset):
     for line in lines:
         patch_file = line.strip('\n')
         # Perform binary and file / content checks
-        scan_patch(project, patch_file, binary_list, binary_project_list,
+        scan_patch(project, patch_file, binary_list,
                    file_audit_list, file_audit_project_list,
                    file_content_list, file_content_project_list, licence_ext,
                    licence_ignore)
@@ -69,16 +71,26 @@ def prepare_patchset(project, patchset):
     process_failure()
 
 
-def scan_patch(project, patch_file, binary_list, binary_project_list,
-               file_audit_list, file_audit_project_list, file_content_list,
+def scan_patch(project, patch_file, binary_list, file_audit_list,
+               file_audit_project_list, file_content_list,
                file_content_project_list, licence_ext, licence_ignore):
     """ Scan actions for each commited file in patch set """
     global failure
     if is_binary(patch_file):
-        if not binary_list.search(patch_file) and not binary_project_list\
-                .search(patch_file):
-            logger.error('Non Whitelisted Binary file: {0}'.
-                         format(patch_file))
+        hashlist = get_lists.GetLists()
+        binary_hash = hashlist.binary_hash(project, patch_file)
+        if not binary_list.search(patch_file):
+            with open(patch_file, 'rb') as afile:
+                buf = afile.read()
+                hasher.update(buf)
+            if hasher.hexdigest() in binary_hash:
+                logger.info('Found matching file hash for file: {0}'.
+                            format(patch_file))
+            else:
+                logger.error('Non Whitelisted Binary file: {0}'.
+                             format(patch_file))
+                logger.error('Please submit patch with this hash:: {0}'.
+                             format(hasher.hexdigest()))
             failure = True
             with open(reports_dir + "binaries-" + project + ".log", "a") \
                     as gate_report:
index c7c6f28..15498f1 100644 (file)
@@ -17,6 +17,7 @@
 
 from __future__ import division, print_function, absolute_import
 import ConfigParser
+import hashlib
 import os
 import re
 import anteater.utils.anteater_logger as antlog
@@ -29,6 +30,7 @@ config.read('anteater.conf')
 reports_dir = config.get('config', 'reports_dir')
 master_list = config.get('config', 'master_list')
 ignore_dirs = ['.git']
+hasher = hashlib.sha256()
 
 
 def prepare_project(project, project_dir):
@@ -38,7 +40,7 @@ def prepare_project(project, project_dir):
     lists = get_lists.GetLists()
 
     # Get binary white list
-    binary_list, binary_project_list = lists.binary_list(project)
+    binary_list = lists.binary_list(project)
 
     # Get file name black list and project waivers
     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
@@ -51,8 +53,8 @@ def prepare_project(project, project_dir):
     licence_ignore = lists.licence_ignore()
 
     # Perform rudimentary scans
-    scan_file(project_dir, project, binary_list, binary_project_list,
-              file_audit_list, file_audit_project_list, file_content_list,
+    scan_file(project_dir, project, binary_list,file_audit_list,
+              file_audit_project_list, file_content_list,
               project_content_list)
 
     # Perform licence header checks
@@ -60,8 +62,8 @@ def prepare_project(project, project_dir):
     licence_root_check(project_dir, project)
 
 
-def scan_file(project_dir, project, binary_list, binary_project_list,
-              file_audit_list, file_audit_project_list, file_content_list,
+def scan_file(project_dir, project, binary_list, file_audit_list,
+              file_audit_project_list, file_content_list,
               project_content_list):
     """Searches for banned strings and files that are listed """
     for root, dirs, files in os.walk(project_dir):
@@ -114,15 +116,25 @@ def scan_file(project_dir, project, binary_list, binary_project_list,
                                               format(match.group()))
             else:
                 # Check if Binary is whitelisted
-                if not binary_list.search(full_path) \
-                        and not binary_project_list.search(full_path):
-                    logger.error('Non Whitelisted Binary: {0}'.
-                                 format(full_path))
-                    with open(reports_dir + "binaries-" + project + ".log",
-                              "a") \
-                            as gate_report:
-                        gate_report.write('Non Whitelisted Binary: {0}\n'.
-                                          format(full_path))
+                hashlist = get_lists.GetLists()
+                binary_hash = hashlist.binary_hash(project, full_path)
+                if not binary_list.search(full_path):
+                    with open(full_path, 'rb') as afile:
+                        buf = afile.read()
+                        hasher.update(buf)
+                    if hasher.hexdigest() in binary_hash:
+                        logger.info('Found matching file hash for file: {0}'.
+                                    format(full_path))
+                    else:
+                        logger.error('Non Whitelisted Binary file: {0}'.
+                                     format(full_path))
+                        logger.error('Please submit patch with this hash: {0}'.
+                                     format(hasher.hexdigest()))
+                        with open(reports_dir + "binaries-" + project + ".log",
+                                  "a") \
+                                as gate_report:
+                            gate_report.write('Non Whitelisted Binary: {0}\n'.
+                                              format(full_path))
 
 
 def licence_root_check(project_dir, project):
index e41b04a..b0f8d24 100644 (file)
@@ -5,7 +5,9 @@
 # of escaping YAML delimiters too (such as `:`) using double quotes "".
 
 binaries:
-  binary_ignore: [nullvalue]
+  ping:
+    - d0d7dfc73e0fac09d920ebbdf8cd4e0ef623f15d6246ff20d7a6d12c9a48bf41
+
 file_audits:
   file_names: [nullvalue]
   file_contents:
index 309876c..ded5015 100644 (file)
@@ -6,19 +6,7 @@
 
 binaries:
   binary_ignore:
-    - \.DS_Store
-    - \.eot
-    - \.gif
     - \.git/(index|objects)
-    - \.ico
-    - \.idx
-    - \.jp(e?)g
-    - \.otf
-    - \.pack
-    - \.pdf
-    - \.png
-    - \.ttf
-    - \.woff
 
 file_audits:
   file_names: