Introduce ignore list for content scans 75/41975/2
authorlhinds <lhinds@redhat.com>
Thu, 14 Sep 2017 14:39:48 +0000 (15:39 +0100)
committerlhinds <lhinds@redhat.com>
Thu, 14 Sep 2017 14:43:38 +0000 (15:43 +0100)
Anteater was reporting fails on files which are documents and so
harmless (for example an rst file)

This patch introduces a file_ignore list in master_list.yaml

Change-Id: I87c73c80a36114a7df9e1da47d89ca14e3bf668a
Signed-off-by: lhinds <lhinds@redhat.com>
anteater/src/get_lists.py
anteater/src/patch_scan.py
anteater/src/project_scan.py
master_list.yaml

index fd80a6a..17de7cb 100644 (file)
@@ -87,7 +87,6 @@ class GetLists(object):
             binary_hash = (yl['binaries'][project][file_name])
             return binary_hash
         except KeyError:
-            logger.info('No checksum entries found for %s', file_name)
             binary_hash = 'null'
             return binary_hash
 
@@ -134,6 +133,13 @@ class GetLists(object):
 
         return master_list, project_list_re
 
+    def file_ignore(self):
+        try:
+            file_ignore = (yl['file_ignore'])
+        except KeyError:
+            logger.error('Key Error processing file_ignore list values')
+        return file_ignore
+
     def licence_extensions(self):
         try:
             licence_extensions = (yl['licence']['licence_ext'])
index 083639f..3b71f0a 100644 (file)
@@ -49,6 +49,9 @@ def prepare_patchset(project, patchset):
     # Get file content black list and project waivers
     master_list, project_list_re = lists.file_content_list(project)
 
+    # Get File Ignore Lists
+    file_ignore = lists.file_ignore()
+
     # Get Licence Lists
     licence_ext = lists.licence_extensions()
     licence_ignore = lists.licence_ignore()
@@ -67,7 +70,7 @@ def prepare_patchset(project, patchset):
         scan_patch(project, patch_file, binary_list,
                    file_audit_list, file_audit_project_list,
                    master_list, project_list_re, licence_ext,
-                   licence_ignore)
+                   file_ignore, licence_ignore)
 
     # Process each file in patch set using waivers generated above
     # Process final result
@@ -76,7 +79,7 @@ def prepare_patchset(project, patchset):
 
 def scan_patch(project, patch_file, binary_list, file_audit_list,
                file_audit_project_list, master_list,
-               project_list_re, licence_ext, licence_ignore):
+               project_list_re, licence_ext, file_ignore, licence_ignore):
     """ Scan actions for each commited file in patch set """
     global failure
     if is_binary(patch_file):
@@ -122,7 +125,7 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
         except IOError:
             file_exists = False
 
-        if file_exists:
+        if file_exists and not patch_file.endswith(tuple(file_ignore)):
             for line in lines:
                 for key, value in master_list.iteritems():
                     regex = value['regex']
index 411e47f..12e9a97 100644 (file)
@@ -49,13 +49,16 @@ def prepare_project(project, project_dir):
     # Get file content black list and project waivers
     master_list, project_list = lists.file_content_list(project)
 
+    # Get File Ignore Lists
+    file_ignore = lists.file_ignore()
+
     # Get Licence Lists
     licence_ext = lists.licence_extensions()
     licence_ignore = lists.licence_ignore()
 
     # Perform rudimentary scans
     scan_file(project_dir, project, binary_list,file_audit_list,
-              file_audit_project_list, master_list,
+              file_audit_project_list, master_list, file_ignore,
               project_list)
 
     # Perform licence header checks
@@ -64,7 +67,7 @@ def prepare_project(project, project_dir):
 
 
 def scan_file(project_dir, project, binary_list, file_audit_list,
-              file_audit_project_list, master_list,
+              file_audit_project_list, master_list, file_ignore,
               project_list):
     """Searches for banned strings and files that are listed """
     for root, dirs, files in os.walk(project_dir):
@@ -87,60 +90,64 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
                                 write('Matched String: {0}'.
                                       format(match.group()))
 
-            if not is_binary(full_path):
-                try:
-                    fo = open(full_path, 'r')
-                    lines = fo.readlines()
-                except IOError:
-                    logger.error('%s does not exist', full_path)
-
-                for line in lines:
-                    # Check for sensitive content in project files
-                    for key, value in master_list.iteritems():
-                        regex = value['regex']
-                        desc = value['desc']
-                        if re.search(regex, line) and not re.search(project_list, line):
-                            logger.error('File contains violation: %s', full_path)
-                            logger.error('Flagged Content: %s', line.rstrip())
-                            logger.error('Matched Regular Exp: %s', regex)
-                            logger.error('Rationale: %s', desc.rstrip())
-                            with open(reports_dir + "contents-" + project + ".log",
-                                      "a") \
-                                    as gate_report:
-                                        gate_report. \
-                                            write('File contains violation: {0}\n'.
-                                                  format(full_path))
-                                        gate_report. \
-                                            write('Flagged Content: {0}'.
-                                                  format(line))
-                                        gate_report. \
-                                            write('Matched Regular Exp: {0}'.
-                                                  format(regex))
-                                        gate_report. \
-                                            write('Rationale: {0}\n'.
-                                                  format(desc.rstrip()))
-            else:
-                # Check if Binary is whitelisted
-                hashlist = get_lists.GetLists()
-                binary_hash = hashlist.binary_hash(project, full_path)
-                if not binary_list.search(full_path):
-                    with open(full_path, 'rb') as afile:
-                        buf = afile.read()
-                        hasher.update(buf)
-                    if hasher.hexdigest() in binary_hash:
-                        logger.info('Found matching file hash for file: %s',
+                            # Check if Binary is whitelisted
+            hashlist = get_lists.GetLists()
+            binary_hash = hashlist.binary_hash(project, full_path)
+            if is_binary(full_path) and not binary_list.search(full_path):
+                with open(full_path, 'rb') as afile:
+                    buf = afile.read()
+                    hasher.update(buf)
+                if hasher.hexdigest() in binary_hash:
+                    logger.info('Found matching file hash for file: %s',
                                     full_path)
-                    else:
-                        logger.error('Non Whitelisted Binary file: %s',
-                                     full_path)
-                        logger.error('Please submit patch with this hash: %s',
-                                     hasher.hexdigest())
-                        with open(reports_dir + "binaries-" + project + ".log",
-                                  "a") \
-                                as gate_report:
+                else:
+                    logger.error('Non Whitelisted Binary file: %s',
+                                 full_path)
+                    logger.error('Please submit patch with this hash: %s',
+                                 hasher.hexdigest())
+                    with open(reports_dir + "binaries-" + project + ".log",
+                              "a") as gate_report:
                             gate_report.write('Non Whitelisted Binary: {0}\n'.
                                               format(full_path))
 
+            else:
+                if not items.endswith(tuple(file_ignore)):
+                    try:
+                        fo = open(full_path, 'r')
+                        lines = fo.readlines()
+                    except IOError:
+                        logger.error('%s does not exist', full_path)
+
+                    for line in lines:
+                        # Check for sensitive content in project files
+                        for key, value in master_list.iteritems():
+                            regex = value['regex']
+                            desc = value['desc']
+                            if re.search(regex, line) and not re.search(
+                                    project_list, line):
+                                logger.error('File contains violation: %s',
+                                             full_path)
+                                logger.error('Flagged Content: %s',
+                                             line.rstrip())
+                                logger.error('Matched Regular Exp: %s', regex)
+                                logger.error('Rationale: %s', desc.rstrip())
+                                with open(reports_dir + "contents-" + project
+                                                  + ".log", "a") \
+                                        as gate_report:
+                                    gate_report. \
+                                        write('File contains violation: {0}\n'.
+                                              format(full_path))
+                                    gate_report. \
+                                        write('Flagged Content: {0}'.
+                                              format(line))
+                                    gate_report. \
+                                        write('Matched Regular Exp: {0}'.
+                                              format(regex))
+                                    gate_report. \
+                                        write('Rationale: {0}\n'.
+                                              format(desc.rstrip()))
+
+
 
 def licence_root_check(project_dir, project):
     if os.path.isfile(project_dir + '/LICENSE'):
index 4ee1f4c..4c33835 100644 (file)
@@ -189,6 +189,10 @@ file_audits:
       regex: 0\.0\.0\.0
       desc: "Interface listening on all addresses - may break security zones"
 
+file_ignore:
+  - '.rst'
+  - '.md'
+
 licence:
   licence_ext:
     - '.java'