Enter sha256 binary sum in report files
[releng-anteater.git] / anteater / src / project_scan.py
index 5ac8b10..3a04a83 100644 (file)
@@ -30,6 +30,7 @@ config = six.moves.configparser.RawConfigParser()
 config.read('anteater.conf')
 reports_dir = config.get('config', 'reports_dir')
 master_list = config.get('config', 'master_list')
+ignore_list = config.get('config', 'master_list')
 ignore_dirs = ['.git']
 hasher = hashlib.sha256()
 
@@ -47,7 +48,10 @@ def prepare_project(project, project_dir):
     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
 
     # Get file content black list and project waivers
-    file_content_list, project_content_list = lists.file_content_list(project)
+    master_list, ignore_list = lists.file_content_list(project)
+
+    # Get File Ignore Lists
+    file_ignore = lists.file_ignore()
 
     # Get Licence Lists
     licence_ext = lists.licence_extensions()
@@ -55,8 +59,8 @@ def prepare_project(project, project_dir):
 
     # Perform rudimentary scans
     scan_file(project_dir, project, binary_list,file_audit_list,
-              file_audit_project_list, file_content_list,
-              project_content_list)
+              file_audit_project_list, master_list, ignore_list,
+              file_ignore)
 
     # Perform licence header checks
     licence_check(licence_ext, licence_ignore, project, project_dir)
@@ -64,8 +68,8 @@ def prepare_project(project, project_dir):
 
 
 def scan_file(project_dir, project, binary_list, file_audit_list,
-              file_audit_project_list, file_content_list,
-              project_content_list):
+              file_audit_project_list, master_list, ignore_list,
+              file_ignore):
     """Searches for banned strings and files that are listed """
     for root, dirs, files in os.walk(project_dir):
         # Filter out ignored directories from list.
@@ -87,20 +91,54 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
                                 write('Matched String: {0}'.
                                       format(match.group()))
 
-            if not is_binary(full_path):
-                fo = open(full_path, 'r')
-                lines = fo.readlines()
-                for line in lines:
-                    # Check for sensitive content in project files
-                    if file_content_list.search(line) and not \
-                            project_content_list.search(line):
-                        match = file_content_list.search(line)
-                        logger.error('File contains violation: %s', full_path)
-                        logger.error('Flagged Content: %s', line.rstrip())
-                        logger.error('Matched String: %s', match.group())
-                        with open(reports_dir + "contents-" + project + ".log",
-                                  "a") \
-                                as gate_report:
+            # Check if Binary is whitelisted
+            hashlist = get_lists.GetLists()
+            binary_hash = hashlist.binary_hash(project, full_path)
+
+            if is_binary(full_path) and not binary_list.search(full_path):
+                with open(full_path, 'rb') as afile:
+                    buf = afile.read()
+                    hasher.update(buf)
+                if hasher.hexdigest() in binary_hash:
+                    logger.info('Found matching file hash for file: %s',
+                                    full_path)
+                else:
+                    logger.error('Non Whitelisted Binary file: %s',
+                                 full_path)
+                    logger.error('Please submit patch with this hash: %s',
+                                 hasher.hexdigest())
+                    with open(reports_dir + "binaries-" + project + ".log",
+                              "a") as gate_report:
+                            gate_report.write('Non Whitelisted Binary: {0}\n'.
+                                              format(full_path))
+                            gate_report.write(
+                                'Submit patch with the following hash: {0}\n'.
+                                format(hasher.hexdigest()))
+
+            else:
+                if not items.endswith(tuple(file_ignore)):
+                    try:
+                        fo = open(full_path, 'r')
+                        lines = fo.readlines()
+                    except IOError:
+                        logger.error('%s does not exist', full_path)
+
+                    for line in lines:
+                        # Check for sensitive content in project files
+                        for key, value in master_list.iteritems():
+                            regex = value['regex']
+                            desc = value['desc']
+                            if re.search(regex, line) and not re.search(
+                                    ignore_list, line):
+                                logger.error('File contains violation: %s',
+                                             full_path)
+                                logger.error('Flagged Content: %s',
+                                             line.rstrip())
+                                logger.error('Matched Regular Exp: %s', regex)
+                                logger.error('Rationale: %s', desc.rstrip())
+                                with open(reports_dir + "contents-" + project
+                                                  + ".log", "a") \
+                                        as gate_report:
                                     gate_report. \
                                         write('File contains violation: {0}\n'.
                                               format(full_path))
@@ -108,29 +146,12 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
                                         write('Flagged Content: {0}'.
                                               format(line))
                                     gate_report. \
-                                        write('Matched String: {0}\n'.
-                                              format(match.group()))
-            else:
-                # Check if Binary is whitelisted
-                hashlist = get_lists.GetLists()
-                binary_hash = hashlist.binary_hash(project, full_path)
-                if not binary_list.search(full_path):
-                    with open(full_path, 'rb') as afile:
-                        buf = afile.read()
-                        hasher.update(buf)
-                    if hasher.hexdigest() in binary_hash:
-                        logger.info('Found matching file hash for file: %s',
-                                    full_path)
-                    else:
-                        logger.error('Non Whitelisted Binary file: %s',
-                                     full_path)
-                        logger.error('Please submit patch with this hash: %s',
-                                     hasher.hexdigest())
-                        with open(reports_dir + "binaries-" + project + ".log",
-                                  "a") \
-                                as gate_report:
-                            gate_report.write('Non Whitelisted Binary: {0}\n'.
-                                              format(full_path))
+                                        write('Matched Regular Exp: {0}'.
+                                              format(regex))
+                                    gate_report. \
+                                        write('Rationale: {0}\n'.
+                                              format(desc.rstrip()))
+
 
 
 def licence_root_check(project_dir, project):