From d1db65c86809a0ea9adae1423237cdc60eb026fb Mon Sep 17 00:00:00 2001 From: lhinds Date: Fri, 4 Aug 2017 14:46:24 +0100 Subject: [PATCH] Implements Rationale Field This change Implements a descriptive field to explain the rationale behind a block. In time this will be part of a sphinx auto build documentation system, which extracts the `desc` field and places it into a template. The user will then be provided with a url. JIRA: RELENG-276 Change-Id: I82b4eb02fe502928726846cc08388a7a4f0ea2f6 Signed-off-by: lhinds --- anteater/src/get_lists.py | 17 ++--- anteater/src/patch_scan.py | 38 ++++++---- anteater/src/project_scan.py | 39 ++++++---- master_list.yaml | 176 ++++++++++++++++++++++++++++++++++--------- 4 files changed, 193 insertions(+), 77 deletions(-) diff --git a/anteater/src/get_lists.py b/anteater/src/get_lists.py index 2419660..fd80a6a 100644 --- a/anteater/src/get_lists.py +++ b/anteater/src/get_lists.py @@ -120,24 +120,19 @@ class GetLists(object): project_list = False self.load_project_exception_file(yl.get('project_exceptions'), project) try: - default_list = set((yl['file_audits']['file_contents'])) + master_list = (yl['file_audits']['file_contents']) + except KeyError: logger.error('Key Error processing file_contents list values') + try: project_list = set((yl['file_audits'][project]['file_contents'])) + project_list_re = re.compile("|".join(project_list), + flags=re.IGNORECASE) except KeyError: logger.info('No file_contents waivers found for %s', project) - file_contents_re = re.compile("|".join(default_list), - flags=re.IGNORECASE) - - if project_list: - file_contents_proj_re = re.compile("|".join(project_list), - flags=re.IGNORECASE) - return file_contents_re, file_contents_proj_re - else: - file_contents_proj_re = re.compile("") - return file_contents_re, file_contents_proj_re + return master_list, project_list_re def licence_extensions(self): try: diff --git a/anteater/src/patch_scan.py b/anteater/src/patch_scan.py index 873c069..9f15556 100644 --- a/anteater/src/patch_scan.py +++ b/anteater/src/patch_scan.py @@ -47,8 +47,7 @@ def prepare_patchset(project, patchset): file_audit_list, file_audit_project_list = lists.file_audit_list(project) # Get file content black list and project waivers - file_content_list, \ - file_content_project_list = lists.file_content_list(project) + master_list, project_list_re = lists.file_content_list(project) # Get Licence Lists licence_ext = lists.licence_extensions() @@ -67,7 +66,7 @@ def prepare_patchset(project, patchset): # Perform binary and file / content checks scan_patch(project, patch_file, binary_list, file_audit_list, file_audit_project_list, - file_content_list, file_content_project_list, licence_ext, + master_list, project_list_re, licence_ext, licence_ignore) # Process each file in patch set using waivers generated above @@ -76,8 +75,8 @@ def prepare_patchset(project, patchset): def scan_patch(project, patch_file, binary_list, file_audit_list, - file_audit_project_list, file_content_list, - file_content_project_list, licence_ext, licence_ignore): + file_audit_project_list, master_list, + project_list_re, licence_ext, licence_ignore): """ Scan actions for each commited file in patch set """ global failure if is_binary(patch_file): @@ -116,16 +115,22 @@ def scan_patch(project, patch_file, binary_list, file_audit_list, format(match.group())) # Open file to check for blacklisted content - fo = open(patch_file, 'r') - lines = fo.readlines() + try: + fo = open(patch_file, 'r') + lines = fo.readlines() + except IOError: + logger.error('%s does not exist', patch_file) + sys.exit(1) for line in lines: - if file_content_list.search(line) and not \ - file_content_project_list.search(line): - match = file_content_list.search(line) - logger.error('File contains violation: %s', patch_file) - logger.error('Flagged Content: %s', line.rstrip()) - logger.error('Matched String: %s', match.group()) + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search(project_list_re, line): + logger.error('File contains violation: %s', patch_file) + logger.error('Flagged Content: %s', line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) failure = True with open(reports_dir + "contents_" + project + ".log", "a") as gate_report: @@ -133,9 +138,10 @@ def scan_patch(project, patch_file, binary_list, file_audit_list, format(patch_file)) gate_report.write('Flagged Content: {0}'. format(line)) - gate_report.write('Matched String: {0}\n'. - format(match.group())) - + gate_report.write('Matched Regular Exp: {0}'. + format(regex)) + gate_report.write('Rationale: {0}'. + format(desc.rstrip())) # Run license check licence_check(project, licence_ext, licence_ignore, patch_file) diff --git a/anteater/src/project_scan.py b/anteater/src/project_scan.py index 5ac8b10..3c37621 100644 --- a/anteater/src/project_scan.py +++ b/anteater/src/project_scan.py @@ -47,7 +47,7 @@ def prepare_project(project, project_dir): file_audit_list, file_audit_project_list = lists.file_audit_list(project) # Get file content black list and project waivers - file_content_list, project_content_list = lists.file_content_list(project) + master_list, project_list = lists.file_content_list(project) # Get Licence Lists licence_ext = lists.licence_extensions() @@ -55,8 +55,8 @@ def prepare_project(project, project_dir): # Perform rudimentary scans scan_file(project_dir, project, binary_list,file_audit_list, - file_audit_project_list, file_content_list, - project_content_list) + file_audit_project_list, master_list, + project_list) # Perform licence header checks licence_check(licence_ext, licence_ignore, project, project_dir) @@ -64,8 +64,8 @@ def prepare_project(project, project_dir): def scan_file(project_dir, project, binary_list, file_audit_list, - file_audit_project_list, file_content_list, - project_content_list): + file_audit_project_list, master_list, + project_list): """Searches for banned strings and files that are listed """ for root, dirs, files in os.walk(project_dir): # Filter out ignored directories from list. @@ -88,16 +88,22 @@ def scan_file(project_dir, project, binary_list, file_audit_list, format(match.group())) if not is_binary(full_path): - fo = open(full_path, 'r') - lines = fo.readlines() + try: + fo = open(full_path, 'r') + lines = fo.readlines() + except IOError: + logger.error('%s does not exist', full_path) + for line in lines: # Check for sensitive content in project files - if file_content_list.search(line) and not \ - project_content_list.search(line): - match = file_content_list.search(line) - logger.error('File contains violation: %s', full_path) - logger.error('Flagged Content: %s', line.rstrip()) - logger.error('Matched String: %s', match.group()) + for key, value in master_list.iteritems(): + regex = value['regex'] + desc = value['desc'] + if re.search(regex, line) and not re.search(project_list, line): + logger.error('File contains violation: %s', full_path) + logger.error('Flagged Content: %s', line.rstrip()) + logger.error('Matched Regular Exp: %s', regex) + logger.error('Rationale: %s', desc.rstrip()) with open(reports_dir + "contents-" + project + ".log", "a") \ as gate_report: @@ -108,8 +114,11 @@ def scan_file(project_dir, project, binary_list, file_audit_list, write('Flagged Content: {0}'. format(line)) gate_report. \ - write('Matched String: {0}\n'. - format(match.group())) + write('Matched Regular Exp: {0}'. + format(regex)) + gate_report. \ + write('Rationale: {0}\n'. + format(desc.rstrip())) else: # Check if Binary is whitelisted hashlist = get_lists.GetLists() diff --git a/master_list.yaml b/master_list.yaml index c40e138..178dde4 100644 --- a/master_list.yaml +++ b/master_list.yaml @@ -1,7 +1,4 @@ --- -# When adding projects all `arrays: []` sections must have -# a value, Use 'nullvalue' if no waivers are available. -# # This file uses standard regular expression syntax, however be mindful # of escaping YAML delimiters too (such as `:`) using double quotes "". @@ -58,38 +55,147 @@ file_audits: - aws_secret_access_key file_contents: - - -----BEGIN\sRSA\sPRIVATE\sKEY---- - - (password|passwd)(.*:|.*=.*) - - curl - - git.*clone - - dual_ec_drbg - - base64_decode - - gost - - md[245] - - panama - - private_key - - rc4 - - ripemd - - secret - - sha0 - - snefru - - ssh_key - - sslv[12] - - streebog - - tlsv1 - - wget - - run_as_root.*=.*True - - exec\s*(\"|\().+(\"|\)) - - \beval\b - - app\.run\s*\(.*debug.*=.*True.*\) - - autoescape.*=.*False - - safestring\.mark_safe.*\(.*\) - - shell.*=.*True - - \/tmp\/ - - \yaml\.load - - telnet - - ftp - - finger + private_key: + regex: -----BEGIN\sRSA\sPRIVATE\sKEY---- + desc: "This looks like it could be a private key" + + password: + regex: (password|passwd)(.*:|.*=.*) + desc: "Possible hardcoded password" + + curl: + regex: \bcurl\b + desc: "Curl can be used for retrieving objects from untrusted sources" + + clone: + regex: git.*clone + desc: "clone blocked as using an non approved external source" + + dual_ec_drbg: + regex: dual_ec_drbg + desc: "Insecure cryptographic algorithm" + + base64_decode: + regex: base64_decode + desc: "Insecure cryptographic algorithm" + + gost: + regex: gost + desc: "Insecure cryptographic algorithm" + + md245: + regex: md[245] + desc: "Insecure hashing algorithm" + + panama: + regex: panama + desc: "Insecure cryptographic algorithm" + + private_key2: + regex: private_key + desc: "This looks like it could be a private key" + + rc4: + regex: rc4 + desc: "Rivest Cipher 4 is an insecure stream cipher" + + ripemd: + regex: ripemd + desc: | + "RACE Integrity Primitives Evaluation Message Digest + is an insecure hashing algorithm" + + secret: + regex: secret + desc: "Possible leak of sensitive information" + + sha: + regex: sha[01] + desc: "Insecure hashing algorithm" + + snefru: + regex: snefru + desc: "Insecure hashing algorithm" + + ssh_key: + regex: ssh_key + desc: "Possible leak of private SSH key" + + sslv: + regex: sslv[12] + desc: "Insecure SSL Version" + + streebog: + regex: sslv[12] + desc: "Insecure cryptographic hashing algorithm" + + tlsv1: + regex: tlsv1 + desc: "Insecure TLS Version" + + wget: + regex: wget + desc: "WGET is blocked to unknown / untrusted destinations" + + run_as_root: + regex: run_as_root.*=.*True + desc: "Its better to use sudo or a rootwrapper" + + exec: + regex: \sexec\s*(\"|\().+(\"|\)) + desc: "Exec can be dangerous when used with arbitrary, untrusted code." + + eval: + regex: \beval\b + desc: "Eval can be dangerous when used with arbitrary, untrusted code." + + apprun: + regex: app\.run\s*\(.*debug.*=.*True.*\) + desc: | + "Running flask in debug mode can give away sensitive data on a + systems configuration" + + autoescape: + regex: autoescape.*=.*False + desc: | + "Without escaping HTML input an application becomes + vulnerable to Cross Site Scripting (XSS) attacks." + + safestring: + regex: safestring\.mark_safe.*\(.*\) + desc: | + "Without escaping HTML input an application becomes + vulnerable to Cross Site Scripting (XSS) attacks." + + shelltrue: + regex: shell.*=.*True + desc: | + "Shell=True can lead to dangerous shell escapes, + expecially when the input can be crafted by untrusted external input" + + tmp: + regex: \/tmp\/ + desc: | + "Use of tmp directories can be dangerous. Its world writable and + accessable, and can be easily guessed by attackers" + + yamlload: + regex: \yaml\.load + desc: | + "Avoid dangerous file parsing and object serialization libraries, + use instead `yaml.safe_load`" + + telnet: + regex: telnet + desc: "Avoid coms applications that transmit credentials in clear text" + + ftp: + regex: \bftp\b + desc: "Avoid coms applications that transmit credentials in clear text" + + finger: + regex: \bfinger\b + desc: "Avoid coms applications that transmit credentials in clear text" licence: licence_ext: -- 2.16.6