Enter sha256 binary sum in report files
[releng-anteater.git] / anteater / src / patch_scan.py
index 0a32f3e..3097d00 100644 (file)
@@ -47,24 +47,30 @@ def prepare_patchset(project, patchset):
     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
 
     # Get file content black list and project waivers
-    file_content_list, \
-        file_content_project_list = lists.file_content_list(project)
+    master_list, ignore_list = lists.file_content_list(project)
+
+    # Get File Ignore Lists
+    file_ignore = lists.file_ignore()
 
     # Get Licence Lists
     licence_ext = lists.licence_extensions()
     licence_ignore = lists.licence_ignore()
 
     # Open patch set to get file list
-    fo = open(patchset, 'r')
-    lines = fo.readlines()
+    try:
+        fo = open(patchset, 'r')
+        lines = fo.readlines()
+    except IOError:
+        logger.error('%s does not exist', patchset)
+        sys.exit(1)
 
     for line in lines:
         patch_file = line.strip('\n')
         # Perform binary and file / content checks
         scan_patch(project, patch_file, binary_list,
                    file_audit_list, file_audit_project_list,
-                   file_content_list, file_content_project_list, licence_ext,
-                   licence_ignore)
+                   master_list, ignore_list, licence_ext,
+                   file_ignore, licence_ignore)
 
     # Process each file in patch set using waivers generated above
     # Process final result
@@ -72,8 +78,8 @@ def prepare_patchset(project, patchset):
 
 
 def scan_patch(project, patch_file, binary_list, file_audit_list,
-               file_audit_project_list, file_content_list,
-               file_content_project_list, licence_ext, licence_ignore):
+               file_audit_project_list, master_list,
+               ignore_list, licence_ext, file_ignore, licence_ignore):
     """ Scan actions for each commited file in patch set """
     global failure
     if is_binary(patch_file):
@@ -96,6 +102,9 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
                     as gate_report:
                 gate_report.write('Non Whitelisted Binary file: {0}\n'.
                                   format(patch_file))
+                gate_report.write('Submit patch with the following hash: {0}\n'.
+                                  format(hasher.hexdigest()))
+
     else:
         # Check file names / extensions
         if file_audit_list.search(patch_file) and not \
@@ -112,28 +121,37 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
                                   format(match.group()))
 
         # Open file to check for blacklisted content
-        fo = open(patch_file, 'r')
-        lines = fo.readlines()
-
-        for line in lines:
-            if file_content_list.search(line) and not \
-                    file_content_project_list.search(line):
-                match = file_content_list.search(line)
-                logger.error('File contains violation: %s', patch_file)
-                logger.error('Flagged Content: %s', line.rstrip())
-                logger.error('Matched String: %s', match.group())
-                failure = True
-                with open(reports_dir + "contents_" + project + ".log",
-                          "a") as gate_report:
-                    gate_report.write('File contains violation: {0}\n'.
-                                      format(patch_file))
-                    gate_report.write('Flagged Content: {0}'.
-                                      format(line))
-                    gate_report.write('Matched String: {0}\n'.
-                                      format(match.group()))
-
-        # Run license check
-        licence_check(project, licence_ext, licence_ignore, patch_file)
+        try:
+            fo = open(patch_file, 'r')
+            lines = fo.readlines()
+            file_exists = True
+        except IOError:
+            file_exists = False
+
+        if file_exists and not patch_file.endswith(tuple(file_ignore)):
+            for line in lines:
+                for key, value in master_list.iteritems():
+                    regex = value['regex']
+                    desc = value['desc']
+                    if re.search(regex, line) and not re.search(
+                            ignore_list, line):
+                        logger.error('File contains violation: %s', patch_file)
+                        logger.error('Flagged Content: %s', line.rstrip())
+                        logger.error('Matched Regular Exp: %s', regex)
+                        logger.error('Rationale: %s', desc.rstrip())
+                        failure = True
+                        with open(reports_dir + "contents_" + project + ".log",
+                                  "a") as gate_report:
+                            gate_report.write('File contains violation: {0}\n'.
+                                              format(patch_file))
+                            gate_report.write('Flagged Content: {0}'.
+                                              format(line))
+                            gate_report.write('Matched Regular Exp: {0}\n'.
+                                              format(regex))
+                            gate_report.write('Rationale: {0}\n'.
+                                              format(desc.rstrip()))
+            # Run license check
+            licence_check(project, licence_ext, licence_ignore, patch_file)
 
 
 def licence_check(project, licence_ext,
@@ -147,9 +165,9 @@ def licence_check(project, licence_ext,
         # Note: Hardcoded use of 'copyright' & 'spdx' is the result
         # of a decision made at 2017 plugfest to limit searches to
         # just these two strings.
-        if re.search("copyright", content, re.IGNORECASE):
-            logger.info('Contains needed Licence string: %s', patch_file)
-        elif re.search("spdx", content, re.IGNORECASE):
+        patterns = ['copyright', 'spdx',
+                    'http://creativecommons.org/licenses/by/4.0']
+        if any(i in content.lower() for i in patterns):
             logger.info('Contains needed Licence string: %s', patch_file)
         else:
             logger.error('Licence header missing in file: %s', patch_file)