Implement logfile collector and grep parser 51/28051/10
authorYujun Zhang <zhang.yujunz@zte.com.cn>
Sun, 5 Feb 2017 04:24:30 +0000 (12:24 +0800)
committerYujun Zhang <zhang.yujunz@zte.com.cn>
Tue, 14 Feb 2017 23:03:25 +0000 (07:03 +0800)
JIRA: QTIP-207
JIRA: QTIP-208
Change-Id: Icc14d3097fb305e59df716636ef87504490c9d1b
Signed-off-by: Yujun Zhang <zhang.yujunz@zte.com.cn>
22 files changed:
qtip/base/__init__.py
qtip/base/constant.py
qtip/cli/commands/cmd_metric.py
qtip/collector/__init__.py
qtip/collector/logfile.py
qtip/collector/parser/__init__.py [new file with mode: 0644]
qtip/collector/parser/grep.py [moved from qtip/collector/base.py with 50% similarity]
qtip/loader/file.py
qtip/loader/plan.py
qtip/loader/yaml_file.py
qtip/runner/__init__.py
test-requirements.txt
tests/conftest.py
tests/data/benchmarks/QPI/fake_qpi.yaml [moved from tests/data/benchmarks/QPI/fake-qpi.yaml with 100% similarity]
tests/data/benchmarks/plan/doctor.yaml
tests/data/benchmarks/plan/fake-plan.yaml [deleted file]
tests/data/fake.log [new file with mode: 0644]
tests/unit/collector/__init__.py [new file with mode: 0644]
tests/unit/collector/base_test.py [moved from qtip/loader/module.py with 60% similarity]
tests/unit/collector/grep_test.py [new file with mode: 0644]
tests/unit/collector/logfile_test.py [new file with mode: 0644]
tests/unit/loader/plan_test.py

index e69de29..909703e 100644 (file)
@@ -0,0 +1,19 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corp and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+
+class BaseActor(object):
+    """abstract actor class"""
+
+    def __init__(self, config, parent=None):
+        self._config = config
+        self._parent = parent
+
+    def get_config(self, key, default=None):
+        return self._config.get(key, default)
index 100ec00..09c635a 100644 (file)
@@ -35,6 +35,7 @@ class BaseProp(object):
     # content
     DESCRIPTION = 'description'
     WORKLOADS = 'workloads'
+    TYPE = 'type'
 
 
 class SpecProp(BaseProp):
index d2fbd58..aa4df1f 100644 (file)
@@ -27,7 +27,7 @@ def cmd_list(ctx):
     pass
 
 
-@cli.command('run', help='Run tests to collect Performance Metrics')
+@cli.command('run', help='Run tests to run Performance Metrics')
 @click.argument('name')
 @pass_context
 def cmd_run(ctx, name):
index e69de29..cc957ba 100644 (file)
@@ -0,0 +1,25 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corp and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+
+from qtip.base.constant import BaseProp
+from qtip.collector.parser.grep import GrepParser
+
+
+class CollectorProp(BaseProp):
+    TYPE = 'type'
+    PARSERS = 'parsers'
+    PATHS = 'paths'
+
+
+def load_parser(type_name):
+    if type_name == GrepParser.TYPE:
+        return GrepParser
+    else:
+        raise Exception("Invalid parser type: {}".format(type_name))
index 6ed5aaf..2c2e532 100644 (file)
@@ -7,36 +7,49 @@
 # http://www.apache.org/licenses/LICENSE-2.0
 ##############################################################################
 
-from base import BaseCollector
+from itertools import chain
+from six.moves import reduce
+import os
 
-from qtip.collector.base import CollectorProp as CProp
+from qtip.base import BaseActor
+from qtip.collector import load_parser
+from qtip.collector import CollectorProp as CProp
 from qtip.loader.file import FileLoader
 
 
-class LogfileCollector(BaseCollector):
-    """collect performance metrics from log files"""
+class LogItem(BaseActor):
+    def find(self, filename, paths=None):
+        return self._parent.find(filename, paths)
 
+
+class LogfileCollector(BaseActor):
+    """run performance metrics from log files"""
     TYPE = 'logfile'
+    LOGS = 'logs'
+    PATHS = 'paths'
 
     def __init__(self, config, parent=None):
         super(LogfileCollector, self).__init__(config)
-        paths = [config[CProp.PATHS]] if CProp.PATHS in config else ['.']
-        self.loader = FileLoader('.', paths)
-        self._parent = parent
-
-    def collect(self):
-        captured = {}
-        for item in self._config[CProp.LOGS]:
-            captured.update(self._parse_log(item))
-        return captured
-
-    def _parse_log(self, log_item):
-        captured = {}
-        # TODO(yujunz) select parser by name
-        if CProp.GREP in log_item:
-            for rule in log_item[CProp.GREP]:
-                captured.update(self._grep(log_item[CProp.FILENAME], rule))
-        return captured
-
-    def _grep(self, filename, rule):
-        return {}
+        self._parent = parent  # plan
+        # TODO(yujunz) handle exception of invalid parent
+        dirname = os.path.dirname(self._parent.abspath)
+        paths = [os.path.join(dirname, p) for p in config.get(self.PATHS, [])]
+        self._loader = FileLoader('.', paths)
+
+    def run(self):
+        collected = []
+        for log_item_config in self._config[self.LOGS]:
+            log_item = LogItem(log_item_config, self)
+            matches = [load_parser(c[CProp.TYPE])(c, log_item).run()
+                       for c in log_item.get_config(CProp.PARSERS)]
+            collected = chain(collected, reduce(chain, matches))
+        return reduce(merge_matchobj_to_dict, collected, {'groups': (), 'groupdict': {}})
+
+    def find(self, filename, paths=None):
+        return self._loader.find(filename, paths)
+
+
+def merge_matchobj_to_dict(d, m):
+    d['groups'] = chain(d['groups'], m.groups())
+    d['groupdict'].update(m.groupdict())
+    return d
diff --git a/qtip/collector/parser/__init__.py b/qtip/collector/parser/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
similarity index 50%
rename from qtip/collector/base.py
rename to qtip/collector/parser/grep.py
index e7f9756..d7ada48 100644 (file)
@@ -1,5 +1,5 @@
 ##############################################################################
-# Copyright (c) 2016 ZTE Corp and others.
+# Copyright (c) 2017 ZTE Corp and others.
 #
 # All rights reserved. This program and the accompanying materials
 # are made available under the terms of the Apache License, Version 2.0
@@ -8,20 +8,26 @@
 ##############################################################################
 
 
-from qtip.base.constant import BaseProp
+import re
 
 
-class BaseCollector(object):
-    """performance metrics collector"""
-    def __init__(self, config):
-        self._config = config
+from qtip.base.constant import BaseProp
+from qtip.base import BaseActor
 
 
-class CollectorProp(BaseProp):
-    TYPE = 'type'
-    LOGS = 'logs'
+class GrepProp(BaseProp):
     FILENAME = 'filename'
-    GREP = 'grep'
     REGEX = 'regex'
-    CAPTURE = 'capture'
-    PATHS = 'path'
+
+
+class GrepParser(BaseActor):
+    TYPE = 'grep'
+
+    def run(self):
+        filename = self._parent.get_config(GrepProp.FILENAME)
+        return grep_in_file(self._parent.find(filename), self._config[GrepProp.REGEX])
+
+
+def grep_in_file(filename, regex):
+    with open(filename, 'r') as f:
+        return filter(lambda x: x is not None, [re.search(regex, line) for line in f])
index 00f9481..0ea4d5b 100644 (file)
@@ -25,12 +25,12 @@ class FileLoader(BaseLoader):
     _paths = [ROOT_DIR]
 
     def __init__(self, name, paths=None):
-        self._file = name
-        self._abspath = self._find(name, paths=paths)
+        self._filename = name
+        self.abspath = self.find(name, paths=paths)
 
-    def _find(self, name, paths=None):
+    def find(self, name, paths=None):
         """find a specification in searching paths"""
-        paths = self._paths if paths is None else paths
+        paths = [self.abspath] if paths is None else paths
         for p in paths:
             abspath = path.join(p, self.RELATIVE_PATH, name)
             if path.exists(abspath):
@@ -47,4 +47,4 @@ class FileLoader(BaseLoader):
             item = cls(name, paths=paths)
             yield {
                 BaseProp.NAME: name,
-                BaseProp.ABSPATH: item._abspath}
+                BaseProp.ABSPATH: item.abspath}
index 0fd9ff5..e15651a 100644 (file)
@@ -9,12 +9,20 @@
 
 
 from qtip.base.constant import BaseProp
-from qtip.collector.base import CollectorProp as CProp
-from qtip.loader.module import load_collector
+from qtip.collector import CollectorProp as CProp
+from qtip.collector.logfile import LogfileCollector
 from qtip.loader.yaml_file import YamlFileLoader
 from qtip.loader.qpi import QPISpec
 
 
+# TODO(yujunz) more elegant way to load module dynamically
+def load_collector(type_name):
+    if type_name == LogfileCollector.TYPE:
+        return LogfileCollector
+    else:
+        raise Exception("Invalid collector type: {}".format(type_name))
+
+
 class Plan(YamlFileLoader):
     """
     a benchmark plan is consist of configuration and a QPI list
index ccaee8d..8b78a47 100644 (file)
@@ -20,9 +20,11 @@ class YamlFileLoader(FileLoader):
 
     def __init__(self, name, paths=None):
         super(YamlFileLoader, self).__init__(name, paths)
-        with open(self._abspath, 'r') as stream:
+        abspath = self.abspath
+
+        with open(abspath, 'r') as stream:
             content = yaml.safe_load(stream)
             if not isinstance(content, dict):
-                raise InvalidContent(self._abspath)
+                raise InvalidContent(abspath)
             self.content = content
             self.name = content.get(BaseProp.NAME, path.splitext(name)[0])
index 1db8498..79c3885 100644 (file)
@@ -28,16 +28,16 @@ class Runner(object):
         if driver_name == 'random':
             self.driver = RandomDriver()
         else:
-            raise NotFound(driver_name, package=PkgName.DRIVER)
+            raise NotFound(driver_name, heystack=PkgName.DRIVER)
 
         if collector_name == 'stdout':
             self.collector = StdoutCollector()
         else:
             raise NotFound(collector_name,
-                           package=PkgName.COLLECTOR)
+                           heystack=PkgName.COLLECTOR)
 
         if reporter_name == 'console':
             self.reporter = ConsoleReporter()
         else:
             raise NotFound(reporter_name,
-                           package=PkgName.REPORTER)
+                           heystack=PkgName.REPORTER)
index 43efcfe..a508012 100644 (file)
@@ -2,10 +2,10 @@
 # of appearance. Changing the order has an impact on the overall integration
 # process, which may cause wedges in the gate later.
 
+tox
 pytest
+pytest-cov
+coverage
 pykwalify
 mock
 pip_check_reqs
-coverage
-pytest-cov
-tox
index 7acb75e..32042f2 100644 (file)
@@ -12,6 +12,7 @@ from os import path
 import pytest
 
 from qtip.loader.plan import Plan
+from qtip.loader.plan import PlanProp
 
 
 @pytest.fixture(scope='session')
@@ -26,4 +27,19 @@ def benchmarks_root(data_root):
 
 @pytest.fixture(scope='session')
 def plan(benchmarks_root):
-    return Plan('fake-plan.yaml', [benchmarks_root])
+    return Plan('doctor.yaml', [benchmarks_root])
+
+
+@pytest.fixture(scope='session')
+def plan_config(plan):
+    return plan.content[PlanProp.CONFIG]
+
+
+@pytest.fixture(scope='session')
+def collectors_config(plan_config):
+    return plan_config[PlanProp.COLLECTORS]
+
+
+@pytest.fixture(scope='session')
+def logfile_config(collectors_config):
+    return collectors_config[0]
index f884c60..f8dcf08 100644 (file)
@@ -4,34 +4,30 @@ info:
   facility: local
   engineer: local
 config:
-  driver: sample
   collectors:
     - type: logfile
+      paths:
+        - '../../external/doctor-verify-apex-sample-master'
       logs:
         - filename: doctor_consumer.log
-          # 2016-12-28 03:16:05,630 consumer.py 26 INFO   doctor consumer notified at 1482894965.63
-          grep:
-            - regex: 'doctor consumer notified at \d+(\.\d+)?$'
-              capture: notified consumer
+          parsers:
+            - type: grep
+              regex: 'doctor consumer notified at (?P<notified>\d+(?:\.\d+)?)$'
         - filename: doctor_inspector.log
-          # 2016-12-28 03:16:05,299 inspector.py 76 INFO   event posted at 1482894965.3
-          # 2016-12-28 03:16:05,299 inspector.py 56 INFO   doctor mark vm(<Server: doctor_vm1>) error at 1482894965.3
-          # 2016-12-28 03:16:05,506 inspector.py 66 INFO   doctor mark host(overcloud-novacompute-1.ool-virtual1) down at 1482894965.51
-          grep:
-            - regex: 'event posted at \d+(\.\d+)?$'
-              capture: posted event
-            - regex: 'doctor mark vm\(.*\) error at \d+(\.\d+)?$'
-              capture: marked VM error
-            - regex: 'doctor mark host\(.*\) down at \d+(\.\d+)?$'
-              capture: marked host down
+          parsers:
+            - type: grep
+              regex: 'event posted at (?P<event_posted>\d+(?:\.\d+)?)$'
+            - type: grep
+              regex: 'doctor mark vm\(.*\) error at (?P<vm_error>\d+(?:\.\d+)?)$'
+            - type: grep
+              regex: 'doctor mark host\(.*\) down at (?P<host_down>\d+(?:\.\d+)?)$'
         - filename: disable_network.log
-          # doctor set host down at 1482894965.164096803
-          grep:
-            - regex: 'doctor set host down at \d+(\.\d+)?$'
-              capture: set host down
-  reporter:
-    name: console
-    # transform collected data into timeline
-    transformer: timeline
+          parsers:
+            - type: grep
+              regex: 'doctor set host down at (?P<network_down>\d+(?:\.\d+)?)$'
+  reporters:
+    - type: console
+      # transform collected data into timeline
+      transformer: timeline
 QPIs:
-  - fake-qpi.yaml
+  - fake_qpi.yaml
diff --git a/tests/data/benchmarks/plan/fake-plan.yaml b/tests/data/benchmarks/plan/fake-plan.yaml
deleted file mode 100644 (file)
index 511affd..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-name: fake plan
-description: fake benchmark plan for demonstration and testing
-config:
-  facility: local
-  engineer: local
-  driver: sample
-  collectors: []
-  reporter: console
-QPIs:
-  - fake-qpi.yaml
diff --git a/tests/data/fake.log b/tests/data/fake.log
new file mode 100644 (file)
index 0000000..bab71e5
--- /dev/null
@@ -0,0 +1,9 @@
+Lorem ipsum dolor sit amet,
+consectetur adipiscing elit,
+sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+
+Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
+Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
diff --git a/tests/unit/collector/__init__.py b/tests/unit/collector/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
similarity index 60%
rename from qtip/loader/module.py
rename to tests/unit/collector/base_test.py
index 05cb1b7..17fe1af 100644 (file)
@@ -8,12 +8,11 @@
 ##############################################################################
 
 
-from qtip.collector.logfile import LogfileCollector
+from qtip.loader.plan import load_collector
+from qtip.collector import CollectorProp as CProp
 
 
-# TODO(yujunz) more elegant way to load module dynamically
-def load_collector(type_name):
-    if type_name == LogfileCollector.TYPE:
-        return LogfileCollector
-    else:
-        raise Exception("Invalid collector type: {}".format(type_name))
+def test_load_collector(collectors_config):
+    for c in collectors_config:
+        collector = load_collector(c[CProp.TYPE])
+        assert collector.TYPE == c[CProp.TYPE]
diff --git a/tests/unit/collector/grep_test.py b/tests/unit/collector/grep_test.py
new file mode 100644 (file)
index 0000000..e5d5f8c
--- /dev/null
@@ -0,0 +1,31 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corp and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+import os
+import pytest
+
+from qtip.collector.parser.grep import grep_in_file
+
+
+@pytest.fixture
+def logfile(data_root):
+    return os.path.join(data_root, 'fake.log')
+
+
+@pytest.mark.parametrize("regex,expected", [
+    ('not exist', []),
+    ('Lorem (\S+)', [{'groups': ('ipsum',), 'groupdict': {}}]),
+    ('nisi ut (?P<name>\S+)', [{'groups': ('aliquip',), 'groupdict': {'name': 'aliquip'}}])
+])
+def test_grep_in_file(logfile, regex, expected):
+    matches = grep_in_file(logfile, regex)
+    assert len(matches) == len(expected)
+    for i in range(len(matches)):
+        assert matches[i].groups() == expected[i]['groups']
+        assert matches[i].groupdict() == expected[i]['groupdict']
diff --git a/tests/unit/collector/logfile_test.py b/tests/unit/collector/logfile_test.py
new file mode 100644 (file)
index 0000000..a76aa3e
--- /dev/null
@@ -0,0 +1,33 @@
+##############################################################################
+# Copyright (c) 2017 ZTE Corp and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+import pytest
+
+from qtip.collector.logfile import LogfileCollector
+
+
+@pytest.fixture
+def logfile_collector(logfile_config, plan):
+    return LogfileCollector(logfile_config, plan)
+
+
+def test_run(logfile_collector):
+    collected = logfile_collector.run()
+    assert collected['groupdict'] == {
+        'event_posted': '1482894965.3',
+        'host_down': '1482894965.51',
+        'network_down': '1482894965.164096803',
+        'notified': '1482894965.63',
+        'vm_error': '1482894965.3'
+    }
+    assert list(collected['groups']) == ['1482894965.63',
+                                         '1482894965.3',
+                                         '1482894965.3',
+                                         '1482894965.51',
+                                         '1482894965.164096803']
index 81fd0bd..4872b4c 100644 (file)
@@ -9,13 +9,15 @@
 
 import pytest
 
+from qtip.collector.logfile import LogfileCollector
+from qtip.loader.plan import load_collector
 from qtip.loader.plan import Plan
 from qtip.loader.plan import PlanProp
 from qtip.loader.plan import QPISpec
 
 
 def test_init(plan):
-    assert plan.name == 'fake plan'
+    assert plan.name == 'doctor performance profiling'
     assert isinstance(plan.content, dict)
     for qpi in plan.qpis:
         assert isinstance(qpi, QPISpec)
@@ -28,7 +30,7 @@ def test_init(plan):
 
 def test_list_all(benchmarks_root):
     plan_list = Plan.list_all(paths=[benchmarks_root])
-    assert len(list(plan_list)) is 2
+    assert len(list(plan_list)) is 1
     for desc in plan_list:
         assert PlanProp.NAME in desc
         assert PlanProp.CONTENT in desc
@@ -42,3 +44,7 @@ def test_content(plan):
     assert PlanProp.DESCRIPTION in content
     assert PlanProp.CONFIG in content
     assert PlanProp.QPIS in content
+
+
+def test_load_collector():
+    assert load_collector(LogfileCollector.TYPE) is LogfileCollector