X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=monitor%2Fdashboard%2Fstats_overview.json;fp=monitor%2Fdashboard%2Fprototype_dashboard_collapsible.json;h=440bb5a2fcd9368bec25a5d6dd8d78fb80817ece;hb=8d5e2fff2f9800942cf7f2c8a15b648ba3c76a16;hp=58882361ee6905c11d416ffb934afb949f5eafb8;hpb=cb67313c1e14b6eca057bdfc7722bc11506b7a2a;p=bottlenecks.git diff --git a/monitor/dashboard/prototype_dashboard_collapsible.json b/monitor/dashboard/stats_overview.json similarity index 78% rename from monitor/dashboard/prototype_dashboard_collapsible.json rename to monitor/dashboard/stats_overview.json index 58882361..440bb5a2 100644 --- a/monitor/dashboard/prototype_dashboard_collapsible.json +++ b/monitor/dashboard/stats_overview.json @@ -2,40 +2,41 @@ "annotations": { "list": [] }, - "description": "A simple overview of the most important Docker host and container metrics. (Barometer/Prometheus)", + "description": "A monitoring dashboard for OPNFV long duration test", "editable": true, "gnetId": 893, "graphTooltip": 1, "hideControls": false, "id": null, "links": [], - "refresh": "15m", + "refresh": false, "rows": [ { "collapse": false, - "height": 193, + "height": -295, "panels": [ { - "content": "\"Prometheus\nPrometheus\n\n

You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the Grafana and Prometheus projects.

", + "content": "\"Monitoring\n\n

\nYou're using the monitoring suite developed by OPNFV Bottlenecks team for OPNFV long duration test.\nThe monitoring suite consists of a monitoring dashboard - \nGrafana ,\na monitoring data source -\nPrometheus and\nseveral monitoring tools -\nBarometer,\nCadvisor and\nNode.\nThe monitoring suite is evolving along with OPNFV releases and enriched features will be included based on practical needs.\n

", "editable": true, "error": false, + "height": "100", "id": 91, "links": [], "mode": "html", - "span": 6, + "span": 7, "style": {}, "title": "", "transparent": true, "type": "text" }, { - "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", + "content": "### OPNFV Long Duration Test\nThis is a collaborative initiative launched by OPNFV test working group (Testperf) for long duration stability test (soak test) in NFV realm.\nSeveral testing projects in OPNFV has been involved, i.e.,\n[Bottlenecks - system limit testing](https://wiki.opnfv.org/display/bottlenecks),\n[Functest - functional testing](https://wiki.opnfv.org/display/functest),\n[NFVbench -NFVI benchmark testing](https://wiki.opnfv.org/display/bottlenecks),\n[Storperf - storage testing](https://wiki.opnfv.org/display/bottlenecks),\n[VSperf - data path testing](https://wiki.opnfv.org/display/bottlenecks) and\n[Yardstick - performance testing](https://wiki.opnfv.org/display/bottlenecks).\nFor more detail, please refer to the [OPNFV LDT WIKI](https://wiki.opnfv.org/display/testing/Long+Duration+Testing).", "editable": true, "error": false, "id": 92, "links": [], "mode": "markdown", - "span": 6, + "span": 5, "style": {}, "title": "", "transparent": true, @@ -46,12 +47,12 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": false, - "title": "Prometheus Information", - "titleSize": "h6" + "title": "Dashboard Information", + "titleSize": "h5" }, { - "collapse": true, - "height": 239, + "collapse": false, + "height": 177, "panels": [ { "cacheTimeout": null, @@ -103,7 +104,7 @@ "to": "null" } ], - "span": 3, + "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, @@ -113,12 +114,13 @@ "tableColumn": "", "targets": [ { - "expr": "time() - node_boot_time{instance=~\"$server:.*\"}", + "expr": "time() - node_boot_time", + "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "", "refId": "A", - "step": 1800 + "step": 40 } ], "thresholds": "", @@ -182,7 +184,7 @@ "to": "null" } ], - "span": 3, + "span": 1, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, @@ -193,9 +195,10 @@ "targets": [ { "expr": "count(rate(container_last_seen{name=~\".+\"}[$interval]))", + "format": "time_series", "intervalFactor": 2, "refId": "A", - "step": 1800 + "step": 40 } ], "thresholds": "", @@ -260,7 +263,7 @@ "to": "null" } ], - "span": 3, + "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, @@ -270,11 +273,12 @@ "tableColumn": "", "targets": [ { - "expr": "min((node_filesystem_size{fstype=~\"xfs|ext4\",instance=~\"$server:.*\"} - node_filesystem_free{fstype=~\"xfs|ext4\",instance=~\"$server:.*\"} )/ node_filesystem_size{fstype=~\"xfs|ext4\",instance=~\"$server:.*\"})", + "expr": "min((node_filesystem_size{fstype=~\"xfs|ext4\"} - node_filesystem_free{fstype=~\"xfs|ext4\"} )/ node_filesystem_size{fstype=~\"xfs|ext4\"})", + "format": "time_series", "hide": false, "intervalFactor": 2, "refId": "A", - "step": 1800 + "step": 40 } ], "thresholds": "0.75, 0.90", @@ -339,7 +343,7 @@ "to": "null" } ], - "span": 3, + "span": 2, "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, @@ -349,10 +353,11 @@ "tableColumn": "", "targets": [ { - "expr": "((node_memory_MemTotal{instance=~\"$server:.*\"} - node_memory_MemAvailable{instance=~\"$server:.*\"}) / node_memory_MemTotal{instance=~\"$server:.*\"}) * 100", + "expr": "((node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal) * 100", + "format": "time_series", "intervalFactor": 2, "refId": "A", - "step": 1800 + "step": 40 } ], "thresholds": "70, 90", @@ -367,22 +372,10 @@ } ], "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Container, Disk, Memory Statistics", - "titleSize": "h6" - }, - { - "collapse": true, - "height": 218, - "panels": [ + }, { "aliasColors": { - "SENT": "#BF1B00" + "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9" }, "bars": false, "dashLength": 10, @@ -392,7 +385,7 @@ "error": false, "fill": 1, "grid": {}, - "id": 19, + "id": 5, "legend": { "avg": false, "current": false, @@ -407,42 +400,76 @@ "links": [], "nullPointMode": "null as zero", "percentage": false, - "pointradius": 1, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "span": 3, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", + "expr": "sum(rate(container_cpu_system_seconds_total[1m]))", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "a", + "refId": "B", + "step": 120 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "nur container", + "refId": "F", + "step": 4 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))", + "format": "time_series", + "hide": true, + "interval": "", "intervalFactor": 2, - "legendFormat": "RECEIVED", + "legendFormat": "nur docker host", + "metric": "", "refId": "A", - "step": 600 + "step": 4 }, { - "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", + "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100", + "format": "time_series", "hide": false, + "interval": "", "intervalFactor": 2, - "legendFormat": "SENT", - "refId": "B", - "step": 600 + "legendFormat": "", + "metric": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Network Traffic", + "title": "CPU Usage", "tooltip": { "msResolution": true, "shared": true, "sort": 0, "value_type": "cumulative" }, - "transparent": false, "type": "graph", "xaxis": { "buckets": null, @@ -453,8 +480,8 @@ }, "yaxes": [ { - "format": "bytes", - "label": null, + "format": "percent", + "label": "", "logBase": 1, "max": null, "min": null, @@ -472,7 +499,8 @@ }, { "aliasColors": { - "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9" + "Available Memory": "#7EB26D", + "Unavailable Memory": "#7EB26D" }, "bars": false, "dashLength": 10, @@ -482,7 +510,7 @@ "error": false, "fill": 1, "grid": {}, - "id": 5, + "id": 38, "legend": { "avg": false, "current": false, @@ -502,65 +530,172 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "span": 3, + "span": 2, "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_cpu_system_seconds_total[1m]))", + "expr": "container_memory_rss{name=~\".+\"}", + "format": "time_series", "hide": true, "intervalFactor": 2, - "legendFormat": "a", + "legendFormat": "{{__name__}}", + "refId": "D", + "step": 20 + }, + { + "expr": "sum(container_memory_rss{name=~\".+\"})", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "A", + "step": 20 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", "refId": "B", - "step": 120 + "step": 20 }, { - "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))", + "expr": "container_memory_rss{id=\"/\"}", + "format": "time_series", "hide": true, - "interval": "", "intervalFactor": 2, - "legendFormat": "nur container", - "refId": "F", - "step": 10 + "legendFormat": "{{__name__}}", + "refId": "C", + "step": 20 }, { - "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))", + "expr": "sum(container_memory_rss)", + "format": "time_series", "hide": true, - "interval": "", "intervalFactor": 2, - "legendFormat": "nur docker host", - "metric": "", - "refId": "A", + "legendFormat": "{{__name__}}", + "refId": "E", "step": 20 }, { - "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100", + "expr": "node_memory_Buffers", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "node_memory_Dirty", + "refId": "N", + "step": 30 + }, + { + "expr": "node_memory_MemFree", + "format": "time_series", "hide": false, - "interval": "", "intervalFactor": 2, - "legendFormat": "host", - "metric": "", - "refId": "C", - "step": 600 + "legendFormat": "Free Memory", + "refId": "F", + "step": 20 }, { - "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))", + "expr": "node_memory_MemAvailable", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Available Memory", + "refId": "H", + "step": 20 + }, + { + "expr": "node_memory_MemTotal - node_memory_MemAvailable", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Unavailable Memory", + "refId": "G", + "step": 20 + }, + { + "expr": "node_memory_Inactive", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "I", + "step": 2 + }, + { + "expr": "node_memory_KernelStack", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "J", + "step": 30 + }, + { + "expr": "node_memory_Active", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "K", + "step": 30 + }, + { + "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "Unknown", + "refId": "L", + "step": 40 + }, + { + "expr": "node_memory_MemFree + node_memory_Inactive ", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "M", + "step": 30 + }, + { + "expr": "container_memory_rss{name=~\".+\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "O", + "step": 30 + }, + { + "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable", + "format": "time_series", "hide": true, "intervalFactor": 2, "legendFormat": "", - "refId": "D", - "step": 120 + "refId": "P", + "step": 40 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 10000000000 } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Available Memory", "tooltip": { "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -572,11 +707,11 @@ }, "yaxes": [ { - "format": "percent", + "format": "bytes", "label": "", "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -589,6 +724,32 @@ } ] }, + { + "headings": true, + "id": 94, + "limit": 10, + "links": [], + "query": "", + "recent": true, + "search": false, + "span": 12, + "starred": true, + "tags": [], + "title": "Status Quick Access", + "type": "dashlist" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Nodes Overview - CPU, Network, Storage, Container Count", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 265, + "panels": [ { "alert": { "conditions": [ @@ -635,10 +796,12 @@ "fill": 1, "id": 28, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": false, "total": false, "values": false @@ -653,15 +816,26 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "span": 3, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "node_load1{instance=~\"$server:.*\"} / count by(job, instance)(count by(job, instance, cpu)(node_cpu{instance=~\"$server:.*\"}))", + "expr": "node_load1 / count by(job, instance)(count by(job, instance, cpu)(node_cpu))", + "format": "time_series", + "hide": false, "intervalFactor": 2, + "legendFormat": "", "refId": "A", - "step": 600 + "step": 10 + }, + { + "expr": "count by(exported_instance, job)(collectd_load_0{exported_instance=~\"host.\"}) / (count by(job, exported_instance)(count by(exported_instance, job, cpu)(collectd_cpu{exported_instance=~\"host.\"})))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 } ], "thresholds": [ @@ -675,7 +849,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Load", + "title": "CPU Loads on Nodes", "tooltip": { "msResolution": false, "shared": true, @@ -695,7 +869,7 @@ "format": "percentunit", "label": null, "logBase": 1, - "max": "1.50", + "max": "1", "min": null, "show": true }, @@ -710,45 +884,8 @@ ] }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 850000000000 - ], - "type": "gt" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Free/Used Disk Space alert", - "noDataState": "keep_state", - "notifications": [ - { - "id": 1 - } - ] - }, "aliasColors": { - "Belegete Festplatte": "#BF1B00", - "Free Disk Space": "#7EB26D", - "Used Disk Space": "#7EB26D", - "{}": "#BF1B00" + "SENT": "#BF1B00" }, "bars": false, "dashLength": 10, @@ -758,7 +895,7 @@ "error": false, "fill": 1, "grid": {}, - "id": 13, + "id": 19, "legend": { "avg": false, "current": false, @@ -773,46 +910,145 @@ "links": [], "nullPointMode": "null as zero", "percentage": false, - "pointradius": 5, + "pointradius": 1, "points": false, "renderer": "flot", - "seriesOverrides": [ + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "Used Disk Space", - "yaxis": 1 + "expr": "sum(rate(container_network_transmit_bytes_total[$interval])) by (instance) - sum(rate(container_network_receive_bytes_total[$interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "(SENT - RECEIVED) {{instance}}", + "refId": "A", + "step": 10 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total[$interval])) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "SENT - {{instance}}", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(rate(container_network_receive_bytes_total[$interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RECEIVED - {{instance}}", + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Loads on Nodes", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "IN on /sda": "#7EB26D", + "OUT on /sda": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "automated-ds", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], "spaceLength": 10, - "span": 3, - "stack": true, + "span": 4, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "node_filesystem_size{fstype=\"aufs\"} - node_filesystem_free{fstype=\"aufs\"}", + "expr": "-sum(rate(node_disk_bytes_read[$interval])) by (device)", + "format": "time_series", "hide": false, "intervalFactor": 2, - "legendFormat": "Used Disk Space", + "legendFormat": "OUT on /{{device}}", + "metric": "node_disk_bytes_read", "refId": "A", - "step": 600 - } - ], - "thresholds": [ + "step": 10 + }, { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 850000000000 + "expr": "sum(rate(node_disk_bytes_written[$interval])) by (device)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "IN on /{{device}}", + "metric": "", + "refId": "B", + "step": 10 } ], + "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Used Disk Space", + "title": "Disk I/O", "tooltip": { "msResolution": true, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -824,11 +1060,11 @@ }, "yaxes": [ { - "format": "bytes", - "label": "", + "format": "Bps", + "label": null, "logBase": 1, - "max": 1000000000000, - "min": 0, + "max": null, + "min": null, "show": true }, { @@ -845,13 +1081,13 @@ "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Network, CPU, Load Statistics", - "titleSize": "h6" + "showTitle": true, + "title": "Loads on Nodes - CPU, Network, Disk Loads Records", + "titleSize": "h5" }, { "collapse": true, - "height": 249, + "height": 281, "panels": [ { "aliasColors": {}, @@ -861,9 +1097,9 @@ "datasource": "automated-ds", "editable": true, "error": false, - "fill": 5, + "fill": 1, "grid": {}, - "id": 1, + "id": 8, "legend": { "alignAsTable": true, "avg": false, @@ -876,7 +1112,7 @@ "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], "nullPointMode": "null as zero", "percentage": false, @@ -885,31 +1121,37 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "span": 8, - "stack": true, + "span": 6, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[$interval])) by (name) * 100", - "hide": false, - "interval": "", + "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[$interval])) by (name)", "intervalFactor": 2, "legendFormat": "{{name}}", - "metric": "", - "refId": "F", - "step": 240 + "refId": "A", + "step": 4 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "CPU Usage per Container", + "title": "Received Network Traffic per Container", "tooltip": { "msResolution": true, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, + "transparent": false, "type": "graph", "xaxis": { "buckets": null, @@ -920,10 +1162,11 @@ }, "yaxes": [ { - "format": "percent", - "label": "", + "format": "Bps", + "label": null, "logBase": 1, "max": null, + "min": null, "show": true }, { @@ -932,15 +1175,12 @@ "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ] }, { - "aliasColors": { - "IN on /sda": "#7EB26D", - "OUT on /sda": "#890F02" - }, + "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, @@ -949,18 +1189,22 @@ "error": false, "fill": 1, "grid": {}, - "id": 3, + "id": 9, "legend": { + "alignAsTable": true, "avg": false, "current": false, + "hideEmpty": false, + "hideZero": false, "max": false, "min": false, - "show": false, + "rightSide": true, + "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], "nullPointMode": "null as zero", "percentage": false, @@ -969,50 +1213,51 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "span": 2, + "span": 6, "stack": false, "steppedLine": false, "targets": [ { - "expr": "-sum(rate(node_disk_bytes_read[$interval])) by (device)", - "hide": false, + "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])) by (name)", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "OUT on /{{device}}", - "metric": "node_disk_bytes_read", + "legendFormat": "{{name}}", "refId": "A", - "step": 600 + "step": 4 }, { - "expr": "sum(rate(node_disk_bytes_written[$interval])) by (device)", + "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", + "format": "time_series", + "hide": true, "intervalFactor": 2, - "legendFormat": "IN on /{{device}}", - "metric": "", + "legendFormat": "", "refId": "B", - "step": 600 + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Disk I/O", + "title": "Sent Network Traffic per Container", "tooltip": { "msResolution": true, "shared": true, "sort": 0, "value_type": "cumulative" }, + "transparent": false, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, - "show": false, + "show": true, "values": [] }, "yaxes": [ { "format": "Bps", - "label": null, + "label": "", "logBase": 1, "max": null, "min": null, @@ -1020,18 +1265,54 @@ }, { "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, + "label": "", + "logBase": 10, + "max": 8, + "min": 0, "show": false } ] }, { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 850000000000 + ], + "type": "gt" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Free/Used Disk Space alert", + "noDataState": "keep_state", + "notifications": [ + { + "id": 1 + } + ] + }, "aliasColors": { - "Available Memory": "#7EB26D", - "Unavailable Memory": "#7EB26D" + "Belegete Festplatte": "#BF1B00", + "Free Disk Space": "#7EB26D", + "Used Disk Space": "#7EB26D", + "{}": "#BF1B00" }, "bars": false, "dashLength": 10, @@ -1041,7 +1322,7 @@ "error": false, "fill": 1, "grid": {}, - "id": 38, + "id": 13, "legend": { "avg": false, "current": false, @@ -1059,139 +1340,25 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "Used Disk Space", + "yaxis": 1 + } + ], "spaceLength": 10, - "span": 2, + "span": 3, "stack": true, "steppedLine": false, "targets": [ { - "expr": "container_memory_rss{name=~\".+\"}", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "D", - "step": 20 - }, - { - "expr": "sum(container_memory_rss{name=~\".+\"})", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "A", - "step": 20 - }, - { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 20 - }, - { - "expr": "container_memory_rss{id=\"/\"}", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "C", - "step": 20 - }, - { - "expr": "sum(container_memory_rss)", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "E", - "step": 20 - }, - { - "expr": "node_memory_Buffers", - "hide": true, - "intervalFactor": 2, - "legendFormat": "node_memory_Dirty", - "refId": "N", - "step": 30 - }, - { - "expr": "node_memory_MemFree", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "F", - "step": 20 - }, - { - "expr": "node_memory_MemAvailable", - "hide": true, - "intervalFactor": 2, - "legendFormat": "Available Memory", - "refId": "H", - "step": 20 - }, - { - "expr": "node_memory_MemTotal - node_memory_MemAvailable", + "expr": "node_filesystem_size{fstype=\"aufs\"} - node_filesystem_free{fstype=\"aufs\"}", + "format": "time_series", "hide": false, "intervalFactor": 2, - "legendFormat": "Unavailable Memory", - "refId": "G", - "step": 600 - }, - { - "expr": "node_memory_Inactive", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "I", - "step": 30 - }, - { - "expr": "node_memory_KernelStack", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "J", - "step": 30 - }, - { - "expr": "node_memory_Active", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "K", - "step": 30 - }, - { - "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)", - "hide": true, - "intervalFactor": 2, - "legendFormat": "Unknown", - "refId": "L", - "step": 40 - }, - { - "expr": "node_memory_MemFree + node_memory_Inactive ", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "M", - "step": 30 - }, - { - "expr": "container_memory_rss{name=~\".+\"}", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "O", - "step": 30 - }, - { - "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable", - "hide": true, - "intervalFactor": 2, "legendFormat": "", - "refId": "P", - "step": 40 + "refId": "A", + "step": 10 } ], "thresholds": [ @@ -1200,12 +1367,12 @@ "fill": true, "line": true, "op": "gt", - "value": 10000000000 + "value": 850000000000 } ], "timeFrom": null, "timeShift": null, - "title": "Available Memory", + "title": "Used Disk Space", "tooltip": { "msResolution": true, "shared": true, @@ -1222,11 +1389,95 @@ }, "yaxes": [ { - "format": "bytes", + "format": "bytes", + "label": "", + "logBase": 1, + "max": 1000000000000, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "automated-ds", + "editable": true, + "error": false, + "fill": 5, + "grid": {}, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[$interval])) by (name) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "F", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", "label": "", "logBase": 1, - "max": 16000000000, - "min": 0, + "max": null, "show": true }, { @@ -1238,19 +1489,7 @@ "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Container-wise CPU usage, Disk I/O, Memory Available", - "titleSize": "h6" - }, - { - "collapse": true, - "height": 251, - "panels": [ + }, { "aliasColors": {}, "bars": false, @@ -1259,9 +1498,9 @@ "datasource": "automated-ds", "editable": true, "error": false, - "fill": 1, + "fill": 3, "grid": {}, - "id": 8, + "id": 10, "legend": { "alignAsTable": true, "avg": false, @@ -1284,36 +1523,36 @@ "seriesOverrides": [], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[$interval])) by (name)", + "expr": "sum(container_memory_rss{name=~\".+\"}) by (name)", + "hide": false, "intervalFactor": 2, "legendFormat": "{{name}}", "refId": "A", - "step": 240 + "step": 4 }, { - "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", + "expr": "container_memory_usage_bytes{name=~\".+\"}", "hide": true, "intervalFactor": 2, "legendFormat": "{{name}}", "refId": "B", - "step": 10 + "step": 240 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Received Network Traffic per Container", + "title": "Memory Usage per Container", "tooltip": { "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, - "transparent": false, "type": "graph", "xaxis": { "buckets": null, @@ -1324,8 +1563,8 @@ }, "yaxes": [ { - "format": "Bps", - "label": null, + "format": "bytes", + "label": "", "logBase": 1, "max": null, "min": null, @@ -1349,15 +1588,13 @@ "datasource": "automated-ds", "editable": true, "error": false, - "fill": 1, + "fill": 3, "grid": {}, - "id": 9, + "id": 34, "legend": { "alignAsTable": true, "avg": false, "current": false, - "hideEmpty": false, - "hideZero": false, "max": false, "min": false, "rightSide": true, @@ -1376,36 +1613,38 @@ "seriesOverrides": [], "spaceLength": 10, "span": 6, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])) by (name)", + "expr": "sum(container_memory_swap{name=~\".+\"}) by (name)", + "format": "time_series", + "hide": false, "intervalFactor": 2, "legendFormat": "{{name}}", "refId": "A", - "step": 240 + "step": 4 }, { - "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "format": "time_series", "hide": true, "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{name}}", "refId": "B", - "step": 10 + "step": 240 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Sent Network Traffic per Container", + "title": "Memory Swap per Container", "tooltip": { "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, - "transparent": false, "type": "graph", "xaxis": { "buckets": null, @@ -1416,7 +1655,7 @@ }, "yaxes": [ { - "format": "Bps", + "format": "bytes", "label": "", "logBase": 1, "max": null, @@ -1425,37 +1664,22 @@ }, { "format": "short", - "label": "", - "logBase": 10, - "max": 8, - "min": 0, - "show": false + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Network Traffic", - "titleSize": "h6" - }, - { - "collapse": true, - "height": 250, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "automated-ds", - "editable": true, - "error": false, - "fill": 3, - "grid": {}, - "id": 10, + "fill": 1, + "id": 95, "legend": { "alignAsTable": true, "avg": false, @@ -1468,9 +1692,9 @@ "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -1478,32 +1702,40 @@ "seriesOverrides": [], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{name=~\".+\"}) by (name)", - "hide": false, + "expr": "count by(device)(container_fs_io_current)", + "format": "time_series", + "hide": true, "intervalFactor": 2, - "legendFormat": "{{name}}", "refId": "A", - "step": 240 + "step": 120 }, { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "hide": true, + "expr": "container_fs_read_seconds_total{name=~\".+\"}", + "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{name}}", + "legendFormat": "Read - {{name}}", "refId": "B", - "step": 240 + "step": 4 + }, + { + "expr": "-container_fs_write_seconds_total{name=~\".+\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Write - {{name}}", + "refId": "C", + "step": 4 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memory Usage per Container", + "title": "Container IO Seconds Total", "tooltip": { - "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" @@ -1518,8 +1750,8 @@ }, "yaxes": [ { - "format": "bytes", - "label": "", + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -1541,11 +1773,8 @@ "dashLength": 10, "dashes": false, "datasource": "automated-ds", - "editable": true, - "error": false, - "fill": 3, - "grid": {}, - "id": 34, + "fill": 1, + "id": 96, "legend": { "alignAsTable": true, "avg": false, @@ -1558,9 +1787,9 @@ "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -1568,32 +1797,43 @@ "seriesOverrides": [], "spaceLength": 10, "span": 6, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_swap{name=~\".+\"}) by (name)", + "expr": "container_fs_io_current{name=~\".+\"}", + "format": "time_series", "hide": false, + "interval": "", "intervalFactor": 2, "legendFormat": "{{name}}", "refId": "A", - "step": 240 + "step": 4 }, { - "expr": "container_memory_usage_bytes{name=~\".+\"}", + "expr": "container_fs_read_seconds_total{name=~\".+\"}", + "format": "time_series", "hide": true, "intervalFactor": 2, - "legendFormat": "{{name}}", + "legendFormat": "Read - {{name}}", "refId": "B", "step": 240 + }, + { + "expr": "container_fs_write_seconds_total{name=~\".+\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "Write - {{name}}", + "refId": "C", + "step": 240 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Memory Swap per Container", + "title": "Container IO Current", "tooltip": { - "msResolution": true, "shared": true, "sort": 0, "value_type": "individual" @@ -1608,8 +1848,8 @@ }, "yaxes": [ { - "format": "bytes", - "label": "", + "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -1629,34 +1869,34 @@ "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Container Memory Statistics", - "titleSize": "h6" + "showTitle": true, + "title": "Containers on Nodes - CPU, Traffic, Memory Usage", + "titleSize": "h5" }, { "collapse": true, - "height": 97, + "height": null, "panels": [ { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], + "columns": [], + "datasource": "automated-ds", "editable": true, "error": false, + "filterNull": false, "fontSize": "100%", - "id": 37, + "height": "400", + "hideTimeOverride": false, + "id": 93, "links": [], - "pageSize": null, + "pageSize": 100, + "repeat": null, "scroll": true, "showHeader": true, "sort": { "col": 0, - "desc": true + "desc": false }, - "span": 4, + "span": 9, "styles": [ { "colorMode": null, @@ -1667,123 +1907,64 @@ ], "decimals": 2, "pattern": "/.*/", - "thresholds": [ - "10000000", - " 25000000" - ], + "thresholds": [], "type": "number", - "unit": "decbytes" + "unit": "short" } ], "targets": [ { - "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ", - "hide": true, + "expr": "cadvisor_version_info", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "", + "legendFormat": "{{instance}} - cAdvisor Version: {{cadvisorVersion}} - Docker Version: {{dockerVersion}} - OS Version: {{osVersion}} - Host Kernel Version: {{kernelVersion}}", "refId": "A", - "step": 240 + "step": 4 }, { - "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", - "hide": true, + "expr": "prometheus_build_info", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{name}}", + "legendFormat": "{{instance}} - Prometheus Version: {{version}} - Go Version: {{goversion}} - Prometheus Branch: {{branch}}", "refId": "B", - "step": 240 + "step": 4 }, { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "hide": false, + "expr": "node_exporter_build_info", + "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{name}}", + "legendFormat": "{{instance}} - Node-Exporter Version: {{version}} - Go Version: {{goversion}} - Node Exporter Branch: {{branch}}", "refId": "C", - "step": 240 - } - ], - "title": "Usage memory", - "transform": "timeseries_aggregations", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "editable": true, - "error": false, - "fontSize": "100%", - "id": 35, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 1, - "desc": true - }, - "span": 4, - "styles": [ - { - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - "80", - "90" - ], - "type": "number", - "unit": "percent" - } - ], - "targets": [ - { - "expr": "sum(100 - ((container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) * 100 / container_spec_memory_limit_bytes{name=~\".+\"}) ) by (name) ", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "", - "refId": "A", - "step": 240 - }, - { - "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 240 + "step": 4 }, { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "hide": true, + "expr": "collectd_exporter_build_info", + "format": "time_series", + "hide": false, "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "C", - "step": 240 + "legendFormat": "{{instance}} - Collectd Exporter Version: {{version}} - Go Version: {{goversion}} - Collectd Exporter Branch: {{branch}} ", + "refId": "G", + "step": 4 } ], - "title": "Remaining memory", + "timeFrom": null, + "timeShift": null, + "title": "Versions", "transform": "timeseries_aggregations", "type": "table" }, { "columns": [ { - "text": "Current", - "value": "current" + "text": "Avg", + "value": "avg" } ], + "datasource": "automated-ds", "editable": true, "error": false, "fontSize": "100%", + "height": "400", "id": 36, "links": [], "pageSize": null, @@ -1793,7 +1974,7 @@ "col": 0, "desc": true }, - "span": 4, + "span": 3, "styles": [ { "colorMode": null, @@ -1815,6 +1996,7 @@ "targets": [ { "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ", + "format": "time_series", "hide": true, "intervalFactor": 2, "legendFormat": "{{name}}", @@ -1824,14 +2006,16 @@ }, { "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", + "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{name}}", "refId": "B", - "step": 240 + "step": 10 }, { "expr": "container_memory_usage_bytes{name=~\".+\"}", + "format": "time_series", "hide": true, "intervalFactor": 2, "legendFormat": "{{name}}", @@ -1847,9 +2031,42 @@ "repeat": null, "repeatIteration": null, "repeatRowId": null, - "showTitle": false, - "title": "Memory Usage", - "titleSize": "h6" + "showTitle": true, + "title": "Versions - Host OS, Docker, APP, Build Information", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OVS Bridges on Nodes - TODO", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "VMs on Nodes - TODO", + "titleSize": "h5" + }, + { + "collapse": true, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Traffic Generators Status - TODO", + "titleSize": "h5" } ], "schemaVersion": 14, @@ -1983,8 +2200,8 @@ { "allValue": null, "current": { - "text": "192.168.114.2", - "value": "192.168.114.2" + "text": null, + "value": null }, "datasource": "automated-ds", "hide": 0, @@ -2006,7 +2223,7 @@ ] }, "time": { - "from": "now-24h", + "from": "now-30m", "to": "now" }, "timepicker": { @@ -2035,6 +2252,6 @@ ] }, "timezone": "browser", - "title": "Collapsibe System Monitor using Prometheus", - "version": 3 -} + "title": "Stats Overview", + "version": 2 +} \ No newline at end of file