Implements Rationale Field 65/38765/8
authorlhinds <lhinds@redhat.com>
Fri, 4 Aug 2017 13:46:24 +0000 (14:46 +0100)
committerlhinds <lhinds@redhat.com>
Fri, 4 Aug 2017 14:48:30 +0000 (15:48 +0100)
This change Implements a descriptive field to explain the
rationale behind a block.

In time this will be part of a sphinx auto build documentation
system, which extracts the `desc` field and places it into a
template. The user will then be provided with a url.

JIRA: RELENG-276

Change-Id: I82b4eb02fe502928726846cc08388a7a4f0ea2f6
Signed-off-by: lhinds <lhinds@redhat.com>
anteater/src/get_lists.py
anteater/src/patch_scan.py
anteater/src/project_scan.py
master_list.yaml

index 2419660..fd80a6a 100644 (file)
@@ -120,24 +120,19 @@ class GetLists(object):
         project_list = False
         self.load_project_exception_file(yl.get('project_exceptions'), project)
         try:
-            default_list = set((yl['file_audits']['file_contents']))
+            master_list = (yl['file_audits']['file_contents'])
+
         except KeyError:
             logger.error('Key Error processing file_contents list values')
+
         try:
             project_list = set((yl['file_audits'][project]['file_contents']))
+            project_list_re = re.compile("|".join(project_list),
+                                               flags=re.IGNORECASE)
         except KeyError:
             logger.info('No file_contents waivers found  for %s', project)
 
-        file_contents_re = re.compile("|".join(default_list),
-                                      flags=re.IGNORECASE)
-
-        if project_list:
-            file_contents_proj_re = re.compile("|".join(project_list),
-                                               flags=re.IGNORECASE)
-            return file_contents_re, file_contents_proj_re
-        else:
-            file_contents_proj_re = re.compile("")
-            return file_contents_re, file_contents_proj_re
+        return master_list, project_list_re
 
     def licence_extensions(self):
         try:
index 873c069..9f15556 100644 (file)
@@ -47,8 +47,7 @@ def prepare_patchset(project, patchset):
     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
 
     # Get file content black list and project waivers
-    file_content_list, \
-        file_content_project_list = lists.file_content_list(project)
+    master_list, project_list_re = lists.file_content_list(project)
 
     # Get Licence Lists
     licence_ext = lists.licence_extensions()
@@ -67,7 +66,7 @@ def prepare_patchset(project, patchset):
         # Perform binary and file / content checks
         scan_patch(project, patch_file, binary_list,
                    file_audit_list, file_audit_project_list,
-                   file_content_list, file_content_project_list, licence_ext,
+                   master_list, project_list_re, licence_ext,
                    licence_ignore)
 
     # Process each file in patch set using waivers generated above
@@ -76,8 +75,8 @@ def prepare_patchset(project, patchset):
 
 
 def scan_patch(project, patch_file, binary_list, file_audit_list,
-               file_audit_project_list, file_content_list,
-               file_content_project_list, licence_ext, licence_ignore):
+               file_audit_project_list, master_list,
+               project_list_re, licence_ext, licence_ignore):
     """ Scan actions for each commited file in patch set """
     global failure
     if is_binary(patch_file):
@@ -116,16 +115,22 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
                                   format(match.group()))
 
         # Open file to check for blacklisted content
-        fo = open(patch_file, 'r')
-        lines = fo.readlines()
+        try:
+            fo = open(patch_file, 'r')
+            lines = fo.readlines()
+        except IOError:
+            logger.error('%s does not exist', patch_file)
+            sys.exit(1)
 
         for line in lines:
-            if file_content_list.search(line) and not \
-                    file_content_project_list.search(line):
-                match = file_content_list.search(line)
-                logger.error('File contains violation: %s', patch_file)
-                logger.error('Flagged Content: %s', line.rstrip())
-                logger.error('Matched String: %s', match.group())
+            for key, value in master_list.iteritems():
+                regex = value['regex']
+                desc = value['desc']
+                if re.search(regex, line) and not re.search(project_list_re, line):
+                    logger.error('File contains violation: %s', patch_file)
+                    logger.error('Flagged Content: %s', line.rstrip())
+                    logger.error('Matched Regular Exp: %s', regex)
+                    logger.error('Rationale: %s', desc.rstrip())
                 failure = True
                 with open(reports_dir + "contents_" + project + ".log",
                           "a") as gate_report:
@@ -133,9 +138,10 @@ def scan_patch(project, patch_file, binary_list, file_audit_list,
                                       format(patch_file))
                     gate_report.write('Flagged Content: {0}'.
                                       format(line))
-                    gate_report.write('Matched String: {0}\n'.
-                                      format(match.group()))
-
+                    gate_report.write('Matched Regular Exp: {0}'.
+                                      format(regex))
+                    gate_report.write('Rationale: {0}'.
+                                      format(desc.rstrip()))
         # Run license check
         licence_check(project, licence_ext, licence_ignore, patch_file)
 
index 5ac8b10..3c37621 100644 (file)
@@ -47,7 +47,7 @@ def prepare_project(project, project_dir):
     file_audit_list, file_audit_project_list = lists.file_audit_list(project)
 
     # Get file content black list and project waivers
-    file_content_list, project_content_list = lists.file_content_list(project)
+    master_list, project_list = lists.file_content_list(project)
 
     # Get Licence Lists
     licence_ext = lists.licence_extensions()
@@ -55,8 +55,8 @@ def prepare_project(project, project_dir):
 
     # Perform rudimentary scans
     scan_file(project_dir, project, binary_list,file_audit_list,
-              file_audit_project_list, file_content_list,
-              project_content_list)
+              file_audit_project_list, master_list,
+              project_list)
 
     # Perform licence header checks
     licence_check(licence_ext, licence_ignore, project, project_dir)
@@ -64,8 +64,8 @@ def prepare_project(project, project_dir):
 
 
 def scan_file(project_dir, project, binary_list, file_audit_list,
-              file_audit_project_list, file_content_list,
-              project_content_list):
+              file_audit_project_list, master_list,
+              project_list):
     """Searches for banned strings and files that are listed """
     for root, dirs, files in os.walk(project_dir):
         # Filter out ignored directories from list.
@@ -88,16 +88,22 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
                                       format(match.group()))
 
             if not is_binary(full_path):
-                fo = open(full_path, 'r')
-                lines = fo.readlines()
+                try:
+                    fo = open(full_path, 'r')
+                    lines = fo.readlines()
+                except IOError:
+                    logger.error('%s does not exist', full_path)
+
                 for line in lines:
                     # Check for sensitive content in project files
-                    if file_content_list.search(line) and not \
-                            project_content_list.search(line):
-                        match = file_content_list.search(line)
-                        logger.error('File contains violation: %s', full_path)
-                        logger.error('Flagged Content: %s', line.rstrip())
-                        logger.error('Matched String: %s', match.group())
+                    for key, value in master_list.iteritems():
+                        regex = value['regex']
+                        desc = value['desc']
+                        if re.search(regex, line) and not re.search(project_list, line):
+                            logger.error('File contains violation: %s', full_path)
+                            logger.error('Flagged Content: %s', line.rstrip())
+                            logger.error('Matched Regular Exp: %s', regex)
+                            logger.error('Rationale: %s', desc.rstrip())
                         with open(reports_dir + "contents-" + project + ".log",
                                   "a") \
                                 as gate_report:
@@ -108,8 +114,11 @@ def scan_file(project_dir, project, binary_list, file_audit_list,
                                         write('Flagged Content: {0}'.
                                               format(line))
                                     gate_report. \
-                                        write('Matched String: {0}\n'.
-                                              format(match.group()))
+                                        write('Matched Regular Exp: {0}'.
+                                              format(regex))
+                                    gate_report. \
+                                        write('Rationale: {0}\n'.
+                                              format(desc.rstrip()))
             else:
                 # Check if Binary is whitelisted
                 hashlist = get_lists.GetLists()
index c40e138..178dde4 100644 (file)
@@ -1,7 +1,4 @@
 ---
-# When adding projects all `arrays: []` sections must have
-# a value, Use 'nullvalue' if no waivers are available.
-#
 # This file uses standard regular expression syntax,  however be mindful
 # of escaping YAML delimiters too (such as `:`) using double quotes "".
 
@@ -58,38 +55,147 @@ file_audits:
     - aws_secret_access_key
 
   file_contents:
-    - -----BEGIN\sRSA\sPRIVATE\sKEY----
-    - (password|passwd)(.*:|.*=.*)
-    - curl
-    - git.*clone
-    - dual_ec_drbg
-    - base64_decode
-    - gost
-    - md[245]
-    - panama
-    - private_key
-    - rc4
-    - ripemd
-    - secret
-    - sha0
-    - snefru
-    - ssh_key
-    - sslv[12]
-    - streebog
-    - tlsv1
-    - wget
-    - run_as_root.*=.*True
-    - exec\s*(\"|\().+(\"|\))
-    - \beval\b
-    - app\.run\s*\(.*debug.*=.*True.*\)
-    - autoescape.*=.*False
-    - safestring\.mark_safe.*\(.*\)
-    - shell.*=.*True
-    - \/tmp\/
-    - \yaml\.load
-    - telnet
-    - ftp
-    - finger
+    private_key:
+      regex: -----BEGIN\sRSA\sPRIVATE\sKEY----
+      desc: "This looks like it could be a private key"
+
+    password:
+      regex: (password|passwd)(.*:|.*=.*)
+      desc: "Possible hardcoded password"
+
+    curl:
+      regex: \bcurl\b
+      desc: "Curl can be used for retrieving objects from untrusted sources"
+
+    clone:
+      regex: git.*clone
+      desc: "clone blocked as using an non approved external source"
+
+    dual_ec_drbg:
+      regex: dual_ec_drbg
+      desc: "Insecure cryptographic algorithm"
+
+    base64_decode:
+      regex: base64_decode
+      desc: "Insecure cryptographic algorithm"
+
+    gost:
+      regex: gost
+      desc: "Insecure cryptographic algorithm"
+
+    md245:
+      regex: md[245]
+      desc: "Insecure hashing algorithm"
+
+    panama:
+      regex: panama
+      desc: "Insecure cryptographic algorithm"
+
+    private_key2:
+      regex: private_key
+      desc: "This looks like it could be a private key"
+
+    rc4:
+      regex: rc4
+      desc: "Rivest Cipher 4 is an insecure stream cipher"
+
+    ripemd:
+      regex: ripemd
+      desc: |
+        "RACE Integrity Primitives Evaluation Message Digest
+        is an insecure hashing algorithm"
+
+    secret:
+      regex: secret
+      desc: "Possible leak of sensitive information"
+
+    sha:
+      regex: sha[01]
+      desc: "Insecure hashing algorithm"
+
+    snefru:
+      regex: snefru
+      desc: "Insecure hashing algorithm"
+
+    ssh_key:
+      regex: ssh_key
+      desc: "Possible leak of private SSH key"
+
+    sslv:
+      regex: sslv[12]
+      desc: "Insecure SSL Version"
+
+    streebog:
+      regex: sslv[12]
+      desc: "Insecure cryptographic hashing algorithm"
+
+    tlsv1:
+      regex: tlsv1
+      desc: "Insecure TLS Version"
+
+    wget:
+      regex: wget
+      desc: "WGET is blocked to unknown / untrusted destinations"
+
+    run_as_root:
+      regex: run_as_root.*=.*True
+      desc: "Its better to use sudo or a rootwrapper"
+
+    exec:
+      regex: \sexec\s*(\"|\().+(\"|\))
+      desc: "Exec can be dangerous when used with arbitrary, untrusted code."
+
+    eval:
+      regex: \beval\b
+      desc: "Eval can be dangerous when used with arbitrary, untrusted code."
+
+    apprun:
+      regex: app\.run\s*\(.*debug.*=.*True.*\)
+      desc: |
+        "Running flask in debug mode can give away sensitive data on a
+        systems configuration"
+
+    autoescape:
+      regex: autoescape.*=.*False
+      desc: |
+        "Without escaping HTML input an application becomes
+        vulnerable to Cross Site Scripting (XSS) attacks."
+
+    safestring:
+      regex: safestring\.mark_safe.*\(.*\)
+      desc: |
+        "Without escaping HTML input an application becomes
+        vulnerable to Cross Site Scripting (XSS) attacks."
+
+    shelltrue:
+      regex: shell.*=.*True
+      desc: |
+        "Shell=True can lead to dangerous shell escapes,
+        expecially when the input can be crafted by untrusted external input"
+
+    tmp:
+      regex: \/tmp\/
+      desc: |
+        "Use of tmp directories can be dangerous. Its world writable and
+        accessable, and can be easily guessed by attackers"
+
+    yamlload:
+      regex: \yaml\.load
+      desc: |
+        "Avoid dangerous file parsing and object serialization libraries,
+        use instead `yaml.safe_load`"
+
+    telnet:
+      regex: telnet
+      desc: "Avoid coms applications that transmit credentials in clear text"
+
+    ftp:
+      regex: \bftp\b
+      desc: "Avoid coms applications that transmit credentials in clear text"
+
+    finger:
+      regex: \bfinger\b
+      desc: "Avoid coms applications that transmit credentials in clear text"
 
 licence:
   licence_ext: