only publish testcases appear in testcases.py to save time
[releng.git] / utils / test / scripts / mongo_to_elasticsearch.py
index 6799574..3af7c0f 100644 (file)
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
+
 import datetime
 import json
-import logging
 import os
 import subprocess
 import traceback
@@ -11,219 +11,236 @@ import uuid
 import argparse
 
 import conf_utils
-import shared_utils
+import logger_utils
 import mongo2elastic_format
+import shared_utils
 
-logger = logging.getLogger('mongo_to_elasticsearch')
-logger.setLevel(logging.DEBUG)
-file_handler = logging.FileHandler('/var/log/{}.log'.format('mongo_to_elasticsearch'))
-file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s: %(message)s'))
-logger.addHandler(file_handler)
-
-
-def _fix_date(date_string):
-    if isinstance(date_string, dict):
-        return date_string['$date']
-    else:
-        return date_string[:-3].replace(' ', 'T') + 'Z'
-
-
-def verify_mongo_entry(testcase):
-    """
-    Mandatory fields:
-        installer
-        pod_name
-        version
-        case_name
-        date
-        project
-        details
-
-        these fields must be present and must NOT be None
-
-    Optional fields:
-        description
-
-        these fields will be preserved if the are NOT None
-    """
-    mandatory_fields = ['installer',
-                        'pod_name',
-                        'version',
-                        'case_name',
-                        'project_name',
-                        'details']
-    mandatory_fields_to_modify = {'start_date': _fix_date}
-    fields_to_swap_or_add = {'scenario': 'version'}
-    if '_id' in testcase:
-        mongo_id = testcase['_id']
-    else:
-        mongo_id = None
-    optional_fields = ['description']
-    for key, value in testcase.items():
-        if key in mandatory_fields:
-            if value is None:
-                # empty mandatory field, invalid input
-                logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing value"
-                            " for mandatory field '{}'".format(mongo_id, key))
-                return False
-            else:
-                mandatory_fields.remove(key)
-        elif key in mandatory_fields_to_modify:
-            if value is None:
-                # empty mandatory field, invalid input
-                logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing value"
-                            " for mandatory field '{}'".format(mongo_id, key))
-                return False
-            else:
-                testcase[key] = mandatory_fields_to_modify[key](value)
-                del mandatory_fields_to_modify[key]
-        elif key in fields_to_swap_or_add:
-            if value is None:
-                swapped_key = fields_to_swap_or_add[key]
-                swapped_value = testcase[swapped_key]
-                logger.info("Swapping field '{}' with value None for '{}' with value '{}'.".format(key, swapped_key, swapped_value))
-                testcase[key] = swapped_value
-                del fields_to_swap_or_add[key]
-            else:
-                del fields_to_swap_or_add[key]
-        elif key in optional_fields:
-            if value is None:
-                # empty optional field, remove
-                del testcase[key]
-            optional_fields.remove(key)
-        else:
-            # unknown field
-            del testcase[key]
-
-    if len(mandatory_fields) > 0:
-        # some mandatory fields are missing
-        logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing"
-                    " mandatory field(s) '{}'".format(mongo_id, mandatory_fields))
-        return False
-    elif len(mandatory_fields_to_modify) > 0:
-        # some mandatory fields are missing
-        logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing"
-                    " mandatory field(s) '{}'".format(mongo_id, mandatory_fields_to_modify.keys()))
-        return False
-    else:
-        if len(fields_to_swap_or_add) > 0:
-            for key, swap_key in fields_to_swap_or_add.iteritems():
-                testcase[key] = testcase[swap_key]
-
-        return True
-
-
-def modify_mongo_entry(testcase):
-    # 1. verify and identify the testcase
-    # 2. if modification is implemented, then use that
-    # 3. if not, try to use default
-    # 4. if 2 or 3 is successful, return True, otherwise return False
-    if verify_mongo_entry(testcase):
-        project = testcase['project_name']
-        case_name = testcase['case_name']
-        fmt = conf_utils.get_format(project, case_name)
-        if fmt:
-            try:
-                logger.info("Processing %s/%s using format %s" % (project, case_name, fmt))
-                return vars(mongo2elastic_format)[fmt](testcase)
-            except Exception:
-                logger.error("Fail in modify testcase[%s]\nerror message: %s" % (testcase, traceback.format_exc()))
-    else:
-        return False
-
-
-def publish_mongo_data(output_destination):
-    tmp_filename = 'mongo-{}.log'.format(uuid.uuid4())
-    try:
-        subprocess.check_call(['mongoexport',
-                               '--db', 'test_results_collection',
-                               '-c', 'results',
-                               '--out', tmp_filename])
-        with open(tmp_filename) as fobj:
-            for mongo_json_line in fobj:
-                test_result = json.loads(mongo_json_line)
-                if modify_mongo_entry(test_result):
-                    status, data = shared_utils.publish_json(test_result, es_creds, output_destination)
-                    if status > 300:
-                        project = test_result['project_name']
-                        case_name = test_result['case_name']
-                        logger.info('project {} case {} publish failed, due to [{}]'
-                                    .format(project, case_name, json.loads(data)['error']['reason']))
-    finally:
-        if os.path.exists(tmp_filename):
-            os.remove(tmp_filename)
-
-
-def get_mongo_data(days):
-    past_time = datetime.datetime.today() - datetime.timedelta(days=days)
-    mongo_json_lines = subprocess.check_output(['mongoexport', '--db', 'test_results_collection', '-c', 'results',
-                                                '--query', '{{"start_date":{{$gt:"{}"}}}}'
-                                               .format(past_time)]).splitlines()
-
-    mongo_data = []
-    for mongo_json_line in mongo_json_lines:
-        test_result = json.loads(mongo_json_line)
-        if modify_mongo_entry(test_result):
-            # if the modification could be applied, append the modified result
-            mongo_data.append(test_result)
-    return mongo_data
-
-
-def publish_difference(mongo_data, elastic_data, output_destination, es_creds):
-    for elastic_entry in elastic_data:
-        if elastic_entry in mongo_data:
-            mongo_data.remove(elastic_entry)
-
-    logger.info('number of parsed test results: {}'.format(len(mongo_data)))
-
-    for parsed_test_result in mongo_data:
-        shared_utils.publish_json(parsed_test_result, es_creds, output_destination)
+logger = logger_utils.KibanaDashboardLogger('mongo2elastic').get
 
+parser = argparse.ArgumentParser(description='Modify and filter mongo json data for elasticsearch')
+parser.add_argument('-od', '--output-destination',
+                    default='elasticsearch',
+                    choices=('elasticsearch', 'stdout'),
+                    help='defaults to elasticsearch')
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Modify and filter mongo json data for elasticsearch')
-    parser.add_argument('-od', '--output-destination',
-                        default='elasticsearch',
-                        choices=('elasticsearch', 'stdout'),
-                        help='defaults to elasticsearch')
+parser.add_argument('-ml', '--merge-latest', default=0, type=int, metavar='N',
+                    help='get entries old at most N days from mongodb and'
+                         ' parse those that are not already in elasticsearch.'
+                         ' If not present, will get everything from mongodb, which is the default')
+
+parser.add_argument('-e', '--elasticsearch-url', default='http://localhost:9200',
+                    help='the url of elasticsearch, defaults to http://localhost:9200')
+
+parser.add_argument('-u', '--elasticsearch-username', default=None,
+                    help='The username with password for elasticsearch in format username:password')
+
+args = parser.parse_args()
 
-    parser.add_argument('-ml', '--merge-latest', default=0, type=int, metavar='N',
-                        help='get entries old at most N days from mongodb and'
-                             ' parse those that are not already in elasticsearch.'
-                             ' If not present, will get everything from mongodb, which is the default')
+tmp_docs_file = './mongo-{}.json'.format(uuid.uuid4())
 
-    parser.add_argument('-e', '--elasticsearch-url', default='http://localhost:9200',
-                        help='the url of elasticsearch, defaults to http://localhost:9200')
 
-    parser.add_argument('-u', '--elasticsearch-username', default=None,
-                        help='The username with password for elasticsearch in format username:password')
+class DocumentPublisher:
 
-    args = parser.parse_args()
+    def __init__(self, doc, fmt, exist_docs, creds, to):
+        self.doc = doc
+        self.fmt = fmt
+        self.creds = creds
+        self.exist_docs = exist_docs
+        self.to = to
+        self.is_formatted = True
+
+    def format(self):
+        try:
+            if self._verify_document() and self.fmt:
+                self.is_formatted = vars(mongo2elastic_format)[self.fmt](self.doc)
+            else:
+                self.is_formatted = False
+        except Exception:
+            logger.error("Fail in format testcase[%s]\nerror message: %s" %
+                         (self.doc, traceback.format_exc()))
+            self.is_formatted = False
+        finally:
+            return self
+
+    def publish(self):
+        if self.is_formatted and self.doc not in self.exist_docs:
+            self._publish()
+
+    def _publish(self):
+        status, data = shared_utils.publish_json(self.doc, self.creds, self.to)
+        if status > 300:
+            logger.error('Publish record[{}] failed, due to [{}]'
+                         .format(self.doc, json.loads(data)['error']['reason']))
+
+    def _fix_date(self, date_string):
+        if isinstance(date_string, dict):
+            return date_string['$date']
+        else:
+            return date_string[:-3].replace(' ', 'T') + 'Z'
+
+    def _verify_document(self):
+        """
+        Mandatory fields:
+            installer
+            pod_name
+            version
+            case_name
+            date
+            project
+            details
+
+            these fields must be present and must NOT be None
+
+        Optional fields:
+            description
+
+            these fields will be preserved if the are NOT None
+        """
+        mandatory_fields = ['installer',
+                            'pod_name',
+                            'version',
+                            'case_name',
+                            'project_name',
+                            'details']
+        mandatory_fields_to_modify = {'start_date': self._fix_date}
+        fields_to_swap_or_add = {'scenario': 'version'}
+        if '_id' in self.doc:
+            mongo_id = self.doc['_id']
+        else:
+            mongo_id = None
+        optional_fields = ['description']
+        for key, value in self.doc.items():
+            if key in mandatory_fields:
+                if value is None:
+                    # empty mandatory field, invalid input
+                    logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing value"
+                                " for mandatory field '{}'".format(mongo_id, key))
+                    return False
+                else:
+                    mandatory_fields.remove(key)
+            elif key in mandatory_fields_to_modify:
+                if value is None:
+                    # empty mandatory field, invalid input
+                    logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing value"
+                                " for mandatory field '{}'".format(mongo_id, key))
+                    return False
+                else:
+                    self.doc[key] = mandatory_fields_to_modify[key](value)
+                    del mandatory_fields_to_modify[key]
+            elif key in fields_to_swap_or_add:
+                if value is None:
+                    swapped_key = fields_to_swap_or_add[key]
+                    swapped_value = self.doc[swapped_key]
+                    logger.info("Swapping field '{}' with value None for '{}' with value '{}'.".format(key, swapped_key,
+                                                                                                       swapped_value))
+                    self.doc[key] = swapped_value
+                    del fields_to_swap_or_add[key]
+                else:
+                    del fields_to_swap_or_add[key]
+            elif key in optional_fields:
+                if value is None:
+                    # empty optional field, remove
+                    del self.doc[key]
+                optional_fields.remove(key)
+            else:
+                # unknown field
+                del self.doc[key]
+
+        if len(mandatory_fields) > 0:
+            # some mandatory fields are missing
+            logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing"
+                        " mandatory field(s) '{}'".format(mongo_id, mandatory_fields))
+            return False
+        elif len(mandatory_fields_to_modify) > 0:
+            # some mandatory fields are missing
+            logger.info("Skipping testcase with mongo _id '{}' because the testcase was missing"
+                        " mandatory field(s) '{}'".format(mongo_id, mandatory_fields_to_modify.keys()))
+            return False
+        else:
+            if len(fields_to_swap_or_add) > 0:
+                for key, swap_key in fields_to_swap_or_add.iteritems():
+                    self.doc[key] = self.doc[swap_key]
+
+            return True
+
+
+class DocumentsPublisher:
+
+    def __init__(self, project, case, fmt, days, elastic_url, creds, to):
+        self.project = project
+        self.case = case
+        self.fmt = fmt
+        self.days = days
+        self.elastic_url = elastic_url
+        self.creds = creds
+        self.to = to
+        self.existed_docs = []
+
+    def export(self):
+        if days > 0:
+            past_time = datetime.datetime.today() - datetime.timedelta(days=days)
+            query = '''{{
+                          "project_name": "{}",
+                          "case_name": "{}",
+                          "start_date": {{"$gt" : "{}"}}
+                        }}'''.format(self.project, self.case, past_time)
+        else:
+            query = '''{{
+                           "project_name": "{}",
+                           "case_name": "{}"
+                        }}'''.format(self.project, self.case)
+        cmd = ['mongoexport',
+               '--db', 'test_results_collection',
+               '--collection', 'results',
+               '--query', '{}'.format(query),
+               '--out', '{}'.format(tmp_docs_file)]
+        try:
+            subprocess.check_call(cmd)
+            return self
+        except Exception, err:
+            logger.error("export mongodb failed: %s" % err)
+            self._remove()
+            exit(-1)
+
+    def get_existed_docs(self):
+        self.existed_docs = shared_utils.get_elastic_docs_by_days(self.elastic_url, self.creds, days)
+        return self
+
+    def publish(self):
+        try:
+            with open(tmp_docs_file) as fdocs:
+                for doc_line in fdocs:
+                    DocumentPublisher(json.loads(doc_line),
+                                      self.fmt,
+                                      self.existed_docs,
+                                      self.creds,
+                                      self.to).format().publish()
+        finally:
+            fdocs.close()
+            self._remove()
+
+    def _remove(self):
+        if os.path.exists(tmp_docs_file):
+            os.remove(tmp_docs_file)
+
+
+if __name__ == '__main__':
     base_elastic_url = urlparse.urljoin(args.elasticsearch_url, '/test_results/mongo2elastic')
-    output_destination = args.output_destination
+    to = args.output_destination
     days = args.merge_latest
     es_creds = args.elasticsearch_username
 
-    if output_destination == 'elasticsearch':
-        output_destination = base_elastic_url
-
-    # parsed_test_results will be printed/sent to elasticsearch
-    if days == 0:
-        publish_mongo_data(output_destination)
-    elif days > 0:
-        body = '''{{
-    "query" : {{
-        "range" : {{
-            "start_date" : {{
-                "gte" : "now-{}d"
-            }}
-        }}
-    }}
-}}'''.format(days)
-        elastic_data = shared_utils.get_elastic_data(base_elastic_url, es_creds, body)
-        logger.info('number of hits in elasticsearch for now-{}d: {}'.format(days, len(elastic_data)))
-        mongo_data = get_mongo_data(days)
-        publish_difference(mongo_data, elastic_data, output_destination, es_creds)
-    else:
-        raise Exception('Update must be non-negative')
-
+    if to == 'elasticsearch':
+        to = base_elastic_url
+
+    for project, case_dicts in conf_utils.testcases_yaml.items():
+        for case_dict in case_dicts:
+            case = case_dict.get('name')
+            fmt = conf_utils.compose_format(case_dict.get('format'))
+            DocumentsPublisher(project,
+                               case,
+                               fmt,
+                               days,
+                               base_elastic_url,
+                               es_creds,
+                               to).export().get_existed_docs().publish()