Initial commit for monitoring by prometheus 45/53845/7
authorQiLiang <liangqi1@huawei.com>
Thu, 15 Mar 2018 04:20:59 +0000 (12:20 +0800)
committerQiLiang <liangqi1@huawei.com>
Thu, 29 Mar 2018 09:02:36 +0000 (17:02 +0800)
- install prometheus
- validate the installation
- add prometheus query function
- TODO: test collecting telemetry data from istio

JIRA: CLOVER-7

Change-Id: I983be2db78c8c5c20c0acee9ae81e891884e07fb
Signed-off-by: QiLiang <liangqi1@huawei.com>
clover/monitoring/monitoring.py [new file with mode: 0644]
clover/monitoring/validate.py [new file with mode: 0644]
docs/monitoring.rst [new file with mode: 0644]

diff --git a/clover/monitoring/monitoring.py b/clover/monitoring/monitoring.py
new file mode 100644 (file)
index 0000000..9726fd1
--- /dev/null
@@ -0,0 +1,140 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+from datetime import timedelta
+import pprint
+import requests
+import time
+
+PROMETHEUS_URL = "http://127.0.0.1:9090"
+
+
+class Monitoring(object):
+    PROMETHEUS_HEALTH_UP = "up"
+    PROMETHEUS_ISTIO_TARGETS = {"envoy",
+        "istio-mesh",
+        "kubernetes-apiservers",
+        "kubernetes-cadvisor",
+        "kubernetes-nodes",
+        "kubernetes-service-endpoints",
+        "mixer",
+        "pilot"}
+    PROMETHEUS_API_TARGETS = "/api/v1/targets"
+    PROMETHEUS_API_QUERY = "/api/v1/query"
+    PROMETHEUS_API_QUERY_RANGE = "/api/v1/query_range"
+
+    def __init__(self, host):
+        self.host = host
+
+    def get_targets(self):
+        try:
+            # Reference api: https://prometheus.io/docs/prometheus/latest/querying/api/#targets
+            response = requests.get('%s%s' % (self.host, Monitoring.PROMETHEUS_API_TARGETS))
+            if response.status_code != 200:
+                print("ERROR: get targets status code: %r" % response.status_code)
+                return False
+        except Exception as e:
+            print("ERROR: Cannot connect to prometheus\n%s" % e)
+            return False
+
+        return response.json()
+
+    def is_targets_healthy(self):
+        targets = set()
+
+        raw_targets = self.get_targets()
+        if raw_targets == False:
+            return False
+
+        for target in raw_targets["data"]["activeTargets"]:
+            if target["health"] != Monitoring.PROMETHEUS_HEALTH_UP:
+                print("ERROR: target unhealth job: %s, health: %s" % \
+                    (target["labels"]["job"], target["health"]))
+                return False
+            targets.add(target["labels"]["job"])
+
+        diff = Monitoring.PROMETHEUS_ISTIO_TARGETS - targets
+        if len(diff):
+            print("ERROR: targets %r not found!" % diff)
+            return False
+
+        return True
+
+    # Reference links:
+    #     - https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
+    #     - https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
+    #     - https://github.com/prometheus/prombench/blob/master/apps/load-generator/main.py
+    def query(self, query_params):
+        try:
+            start = time.time()
+
+            query_type = query_params.get("type", "instant")
+            params = {"query": query_params["query"]}
+            if query_type == "instant":
+                url = "%s%s" % (self.host, Monitoring.PROMETHEUS_API_QUERY)
+            elif query_type == "range":
+                url = "%s%s" % (self.host, Monitoring.PROMETHEUS_API_QUERY_RANGE)
+                params["start"] = start - duration_seconds(query_params.get("start", "0h"))
+                params["end"] = start - duration_seconds(query_params.get("end", "0h"))
+                params["step"] = query_params.get("step", "15s")
+            else:
+                print("ERROR: invalidate query type")
+                return
+
+            resp = requests.get(url, params)
+            dur = time.time() - start
+
+            print("query %s %s, status=%s, size=%d, dur=%.3f" % \
+                (self.host, query_params["query"], resp.status_code, len(resp.text), dur))
+            pp = pprint.PrettyPrinter(indent=2)
+            pp.pprint(resp.json())
+
+        except Exception as e:
+            print("ERROR: Could not query prometheus instance %s. \n %s" % (url, e))
+
+
+def duration_seconds(s):
+    num = int(s[:-1])
+
+    if s.endswith('s'):
+        return timedelta(seconds=num).total_seconds()
+    elif s.endswith('m'):
+        return timedelta(minutes=num).total_seconds()
+    elif s.endswith('h'):
+        return timedelta(hours=num).total_seconds()
+
+    raise "ERROR: unknown duration %s" % s
+
+
+def main():
+    m = Monitoring(PROMETHEUS_URL)
+    if not m.is_targets_healthy():
+        print("ERROR: Prometheus targets is unhealthy!")
+    else:
+        print("Prometheus targets are all healthy!")
+
+    print "\n### query instant"
+    query_params = {
+        "type": "instant",
+        "query": "istio_double_request_count{destination='details.default.svc.cluster.local'}"
+    }
+    m.query(query_params)
+
+    print "\n### query range"
+    query_range_param = {
+        "type": "range",
+        "query": "istio_double_request_count{destination='details.default.svc.cluster.local'}",
+        "start": "5m",
+        "end": "3m",
+        "step": "30s"
+     }
+    m.query(query_range_param)
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/clover/monitoring/validate.py b/clover/monitoring/validate.py
new file mode 100644 (file)
index 0000000..fafe5df
--- /dev/null
@@ -0,0 +1,70 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+from monitoring import Monitoring
+from kubernetes import client, config
+
+PROMETHEUS_URL = "http://127.0.0.1:9090"
+PROMETHEUS_DEPLOYMENT = "prometheus"
+PROMETHEUS_LABELS = "app=prometheus"
+ISTIO_NAMESPACE = "istio-system"
+
+
+def validateDeploy():
+    config.load_kube_config()
+    appsv1 = client.AppsV1Api()
+    corev1 = client.CoreV1Api()
+    find_flag = False
+    prom_pod_name = None
+
+    # check prometheus deploytment
+    ret = appsv1.list_deployment_for_all_namespaces(watch=False)
+    for i in ret.items:
+        if PROMETHEUS_DEPLOYMENT == i.metadata.name and \
+           ISTIO_NAMESPACE == i.metadata.namespace:
+           find_flag = True
+           break
+    if find_flag == False:
+        print("ERROR: Deployment: {} doesn't present in {} namespace".format(
+                        PROMETHEUS_DEPLOYMENT, ISTIO_NAMESPACE))
+        return False
+
+    # find prometheus pod by label selector
+    ret = corev1.list_namespaced_pod(ISTIO_NAMESPACE, label_selector=PROMETHEUS_LABELS)
+    for i in ret.items:
+        prom_pod_name = i.metadata.name
+    if prom_pod_name == None:
+        print("ERROR: prometheus pod not found")
+        return False
+
+    # check prometheus pod status
+    ret = corev1.read_namespaced_pod_status(prom_pod_name, ISTIO_NAMESPACE)
+    if ret.status.phase != "Running":
+        print("ERROR: prometheus pod %s is under %s state" % (prom_pod_name, ret.status.phase))
+        return False
+
+    return True
+
+
+def validateService():
+    m = Monitoring(PROMETHEUS_URL)
+
+    return m.is_targets_healthy()
+
+
+def main():
+    if validateDeploy() and validateService():
+        print"Prometheus monitoring validation has passed"
+        return True
+    else:
+        print"ERROR: Prometheus monitoring validation has failed"
+        return False
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/docs/monitoring.rst b/docs/monitoring.rst
new file mode 100644 (file)
index 0000000..44b01e3
--- /dev/null
@@ -0,0 +1,31 @@
+##########
+Monitoring
+##########
+
+************
+Installation
+************
+
+Currently, we use the Istio build-in prometheus addon to install prometheus::
+
+    cd <istio-release-path>
+    kubectl apply -f install/kubernetes/addons/prometheus.yaml
+
+********
+Validate
+********
+
+Setup port-forwarding for prometheus by executing the following command::
+
+    kubectl -n istio-system port-forward $(kubectl -n istio-system get pod -l app=prometheus -o jsonpath='{.items[0].metadata.name}') 9090:9090 &
+
+Run the scripts in ``clover/monitoring`` validates prometheus installation::
+
+    python clover/monitoring/validate.py
+
+It validates the installation with the following criterias
+
+#. [DONE] prometheus pod is in Running state
+#. [DONE] prometheus is conneted to monitoring targets
+#. [TODO] test collecting telemetry data from istio
+#. [TODO] TBD