Fixed regex to streamline wildcards
[releng-anteater.git] / anteater / src / patch_scan.py
index 48c78fd..0a32f3e 100644 (file)
 
 from __future__ import division, print_function, absolute_import
 from binaryornot.check import is_binary
-import anteater.utils.anteater_logger as antlog
-import anteater.src.get_lists as get_lists
-import ConfigParser
+import logging
+import hashlib
+import six.moves.configparser
 import sys
 import re
 
+from . import get_lists
 
-logger = antlog.Logger(__name__).getLogger()
-config = ConfigParser.RawConfigParser()
+logger = logging.getLogger(__name__)
+config = six.moves.configparser.RawConfigParser()
 config.read('anteater.conf')
 reports_dir = config.get('config', 'reports_dir')
 failure = False
+hasher = hashlib.sha256()
 
 
 def prepare_patchset(project, patchset):
@@ -39,7 +41,7 @@ def prepare_patchset(project, patchset):
     # Get Various Lists / Project Waivers
     lists = get_lists.GetLists()
     # Get binary white list
-    binary_list, binary_project_list = lists.binary_list(project)
+    binary_list = lists.binary_list(project)
 
     # Get file name black list and project waivers
     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
@@ -59,7 +61,7 @@ def prepare_patchset(project, patchset):
     for line in lines:
         patch_file = line.strip('\n')
         # Perform binary and file / content checks
-        scan_patch(project, patch_file, binary_list, binary_project_list,
+        scan_patch(project, patch_file, binary_list,
                    file_audit_list, file_audit_project_list,
                    file_content_list, file_content_project_list, licence_ext,
                    licence_ignore)
@@ -69,16 +71,26 @@ def prepare_patchset(project, patchset):
     process_failure()
 
 
-def scan_patch(project, patch_file, binary_list, binary_project_list,
-               file_audit_list, file_audit_project_list, file_content_list,
+def scan_patch(project, patch_file, binary_list, file_audit_list,
+               file_audit_project_list, file_content_list,
                file_content_project_list, licence_ext, licence_ignore):
     """ Scan actions for each commited file in patch set """
     global failure
     if is_binary(patch_file):
-        if not binary_list.search(patch_file) and not binary_project_list\
-                .search(patch_file):
-            logger.error('Non Whitelisted Binary file: {0}'.
-                         format(patch_file))
+        hashlist = get_lists.GetLists()
+        binary_hash = hashlist.binary_hash(project, patch_file)
+        if not binary_list.search(patch_file):
+            with open(patch_file, 'rb') as afile:
+                buf = afile.read()
+                hasher.update(buf)
+            if hasher.hexdigest() in binary_hash:
+                logger.info('Found matching file hash for file: %s',
+                            patch_file)
+            else:
+                logger.error('Non Whitelisted Binary file: %s',
+                             patch_file)
+                logger.error('Submit patch with the following hash: %s',
+                             hasher.hexdigest())
             failure = True
             with open(reports_dir + "binaries-" + project + ".log", "a") \
                     as gate_report:
@@ -89,10 +101,8 @@ def scan_patch(project, patch_file, binary_list, binary_project_list,
         if file_audit_list.search(patch_file) and not \
                     file_audit_project_list.search(patch_file):
             match = file_audit_list.search(patch_file)
-            logger.error('Blacklisted file: {0}'.
-                         format(patch_file))
-            logger.error('Matched String: {0}'.
-                         format(match.group()))
+            logger.error('Blacklisted file: %s', patch_file)
+            logger.error('Matched String: %s', match.group())
             failure = True
             with open(reports_dir + "file-names_" + project + ".log", "a") \
                     as gate_report:
@@ -109,12 +119,9 @@ def scan_patch(project, patch_file, binary_list, binary_project_list,
             if file_content_list.search(line) and not \
                     file_content_project_list.search(line):
                 match = file_content_list.search(line)
-                logger.error('File contains violation: {0}'.
-                             format(patch_file))
-                logger.error('Flagged Content: {0}'.
-                             format(line.rstrip()))
-                logger.error('Matched String: {0}'.
-                             format(match.group()))
+                logger.error('File contains violation: %s', patch_file)
+                logger.error('Flagged Content: %s', line.rstrip())
+                logger.error('Matched String: %s', match.group())
                 failure = True
                 with open(reports_dir + "contents_" + project + ".log",
                           "a") as gate_report:
@@ -141,14 +148,11 @@ def licence_check(project, licence_ext,
         # of a decision made at 2017 plugfest to limit searches to
         # just these two strings.
         if re.search("copyright", content, re.IGNORECASE):
-            logger.info('Contains needed Licence string: {0}'.
-                        format(patch_file))
+            logger.info('Contains needed Licence string: %s', patch_file)
         elif re.search("spdx", content, re.IGNORECASE):
-            logger.info('Contains needed Licence string: {0}'.
-                        format(patch_file))
+            logger.info('Contains needed Licence string: %s', patch_file)
         else:
-            logger.error('Licence header missing in file: {0}'.
-                         format(patch_file))
+            logger.error('Licence header missing in file: %s', patch_file)
             failure = True
             with open(reports_dir + "licence-" + project + ".log", "a") \
                     as gate_report: