Merge AT&T WIP on modeled cloud-native stacks into Models 67/45367/1
authorBryan Sullivan <bryan.sullivan@att.com>
Tue, 17 Oct 2017 16:55:26 +0000 (09:55 -0700)
committerBryan Sullivan <bryan.sullivan@att.com>
Tue, 17 Oct 2017 16:55:26 +0000 (09:55 -0700)
Change-Id: I646825bf7d1a9c1be9c00475028084f920c9d399
Signed-off-by: Bryan Sullivan <bryan.sullivan@att.com>
18 files changed:
docs/images/models-k8s.png [new file with mode: 0644]
tools/README.md [new file with mode: 0644]
tools/docker/demo_deploy.sh [new file with mode: 0644]
tools/docker/docker-cluster.sh [new file with mode: 0644]
tools/docker/nginx.json [new file with mode: 0644]
tools/kubernetes/README.md [new file with mode: 0644]
tools/kubernetes/demo_deploy.sh [new file with mode: 0644]
tools/kubernetes/k8s-cluster.sh [new file with mode: 0644]
tools/maas/deploy.sh [new file with mode: 0644]
tools/prometheus/README.md [new file with mode: 0644]
tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json [new file with mode: 0644]
tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json [new file with mode: 0644]
tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json [new file with mode: 0644]
tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json [new file with mode: 0644]
tools/prometheus/prometheus-tools.sh [new file with mode: 0644]
tools/rancher/demo_deploy.sh [new file with mode: 0644]
tools/rancher/rancher-cluster.sh [new file with mode: 0644]
tools/traffic.sh [new file with mode: 0644]

diff --git a/docs/images/models-k8s.png b/docs/images/models-k8s.png
new file mode 100644 (file)
index 0000000..c54bcdb
Binary files /dev/null and b/docs/images/models-k8s.png differ
diff --git a/tools/README.md b/tools/README.md
new file mode 100644 (file)
index 0000000..16c5b79
--- /dev/null
@@ -0,0 +1,16 @@
+This repo contains experimental scripts etc for setting up cloud-native stacks for application deployment and management on bare-metal servers. A lot of cloud-native focus so far has been on public cloud providers (AWS, GCE, Azure) but there aren't many tools and even fewer full-stack open source platforms for setting up bare metal servers with the same types of cloud-native stack features. This repo is thus a collection of tools in development toward that goal, useful in experimentation, demonstration, and further investigation into characteristics of cloud-native platforms in bare-metal environments, e.g. efficiency, performance, security, and resilience.
+
+The toolset will eventually include these elements of one or more full-stack platform solutions:
+* hardware prerequisite/options guidance
+* container-focused application runtime environment, e.g.
+  * kubernetes
+  * docker-ce
+  * rancher
+* software-defined storage backends, e.g.
+       * ceph
+* runtime-native networking ("out of the box" networking features, vs some special add-on networking software)
+* app orchestration, e.g. via
+  * cloudify
+  * ONAP
+  * Helm
+* applications useful for platform characterization
\ No newline at end of file
diff --git a/tools/docker/demo_deploy.sh b/tools/docker/demo_deploy.sh
new file mode 100644 (file)
index 0000000..cbfe949
--- /dev/null
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Complete scripted deployment of an experimental Docker-based
+#. cloud-native application platform. When complete, Docker-CE and the following
+#. will be installed:
+#. - nginx as demo application
+#. - prometheus + grafana for cluster monitoring/stats
+#.   Prometheus dashboard: http://<master_public_ip>:9090
+#.   Grafana dashboard: http://<master_public_ip>:3000
+#. 
+#. Prerequisites:
+#. - Ubuntu server for cluster nodes (admin/master and worker nodes)
+#. - MAAS server as cluster admin for Rancher master/worker nodes
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ bash ~/models/tools/docker/demo_deploy.sh <key> "<hosts>" <master_ip>
+#.     "<worker_ips>" [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <master_ip>: IP of master node
+#. <worker_ips>: space separated list of worker node IPs
+#. <extras>: optional name of script for extra setup functions as needed
+
+key=$1
+nodes="$2"
+master=$3
+workers="$4"
+extras=$5
+
+source ~/models/tools/maas/deploy.sh $1 "$2" $5
+eval `ssh-agent`
+ssh-add $key
+echo "Setting up Docker..."
+bash ~/models/tools/docker/docker-cluster.sh all $master "$workers"
+# TODO: Figure this out... Have to break the setup into two steps as something
+# causes the ssh session to end before the prometheus setup, if both scripts 
+# (k8s-cluster and prometheus-tools) are in the same ssh session
+echo "Setting up Prometheus..."
+scp -o StrictHostKeyChecking=no $key ubuntu@$master:/home/ubuntu/$key
+ssh -x -o StrictHostKeyChecking=no ubuntu@$master <<EOF
+git clone https://gerrit.opnfv.org/gerrit/models
+exec ssh-agent bash
+ssh-add $key
+bash models/tools/prometheus/prometheus-tools.sh all "$master $workers"
+EOF
+echo "All done!"
diff --git a/tools/docker/docker-cluster.sh b/tools/docker/docker-cluster.sh
new file mode 100644 (file)
index 0000000..8c0aa69
--- /dev/null
@@ -0,0 +1,221 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Deployment script for a mult-node docker-ce cluster. 
+#. Prerequisites: 
+#. - Ubuntu server for master and worker nodes
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models  ~/models
+#. $ cd ~/models/tools/docker
+#.
+#. Usage:
+#. $ bash docker_cluster.sh all <master> "<workers>"
+#.   Automate setup and start demo services.
+#.   <master>: master node IPs
+#.   <workers>: space-separated list of worker node IPs
+#. $ bash docker_cluster.sh setup <master> "<workers>"
+#.   Installs and starts master and worker nodes.
+#. $ bash docker_cluster.sh create <service>
+#.   <service>: Demo service name to start. 
+#.     Currently supported: nginx
+#. $ bash docker_cluster.sh delete <service>
+#.   <service>: Service name to delete.
+#. $ bash docker_cluster.sh clean [<node>]
+#.   <node>: optional IP address of node to clean. 
+#.   By default, cleans the entire cluster.
+#.
+
+# Setup master and worker hosts
+function setup() {
+  # Per https://docs.docker.com/engine/swarm/swarm-tutorial/
+  cat >/tmp/env.sh <<EOF
+master=$1
+workers="$2"
+EOF
+  source /tmp/env.sh
+  cat >/tmp/prereqs.sh <<'EOF'
+#!/bin/bash
+# Per https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/
+sudo apt-get remove -y docker docker-engine docker.io docker-ce
+sudo apt-get update
+sudo apt-get install -y \
+  linux-image-extra-$(uname -r) \
+  linux-image-extra-virtual
+sudo apt-get install -y \
+  apt-transport-https \
+  ca-certificates \
+  curl \
+  software-properties-common
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+sudo add-apt-repository \
+  "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+  $(lsb_release -cs) \
+  stable"
+sudo apt-get update
+sudo apt-get install -y docker-ce
+EOF
+
+  # jq is used for parsing API reponses
+  sudo apt-get install -y jq
+  scp -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$master:/home/ubuntu/prereqs.sh
+  ssh -x -o StrictHostKeyChecking=no ubuntu@$master bash /home/ubuntu/prereqs.sh
+  # activate docker API
+  # Per https://www.ivankrizsan.se/2016/05/18/enabling-docker-remote-api-on-ubuntu-16-04/
+  ssh -x -o StrictHostKeyChecking=no ubuntu@$master <<EOF
+sudo sed -i -- 's~fd://~fd:// -H tcp://0.0.0.0:4243~' /lib/systemd/system/docker.service
+sudo systemctl daemon-reload
+sudo service docker restart
+# Activate swarm mode
+# Per https://docs.docker.com/engine/swarm/swarm-tutorial/create-swarm/
+sudo docker swarm init --advertise-addr $master
+EOF
+
+  if ! curl http://$master:4243/version ; then
+    echo "${FUNCNAME[0]}: docker API failed to initialize"
+    exit 1
+  fi
+
+  # Per https://docs.docker.com/engine/swarm/swarm-tutorial/add-nodes/
+  token=$(ssh -o StrictHostKeyChecking=no -x ubuntu@$master sudo docker swarm join-token worker | grep docker)
+  for worker in $workers; do
+    echo "${FUNCNAME[0]}: setting up worker at $worker"
+    scp -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$worker:/home/ubuntu/.
+    ssh -x -o StrictHostKeyChecking=no ubuntu@$worker bash /home/ubuntu/prereqs.sh
+    ssh -x -o StrictHostKeyChecking=no ubuntu@$worker sudo $token
+  done
+
+  echo "${FUNCNAME[0]}: testing service creation"
+  reps=1; for a in $workers; do ((reps++)); done
+  create_service nginx $reps
+}
+
+
+function create_service() {
+  echo "${FUNCNAME[0]}: creating service $1 with $2 replicas"
+  # sudo docker service create -p 80:80 --replicas $reps --name nginx nginx
+  # per https://docs.docker.com/engine/api/v1.27/
+  source /tmp/env.sh
+  case "$1" in
+    nginx)
+      match="Welcome to nginx!"
+      ;;
+    *)
+      echo "${FUNCNAME[0]}: service $1 not setup for use with this script"
+  esac    
+
+  if ! curl -X POST http://$master:4243/services/create -d @$1.json ; then
+    echo "${FUNCNAME[0]}: service creation failed"
+    exit 1
+  fi
+
+  check_service $1 $match
+}
+
+function check_service() {
+  echo "${FUNCNAME[0]}: checking service state for $1 with match string $2"
+  source /tmp/env.sh
+  service=$1
+  match="$2"
+  services=$(curl http://$master:4243/services)
+  n=$(echo $services | jq '. | length')
+  ((n--))
+  while [[ $n -ge 0 ]]; do
+    if [[ $(echo $services | jq -r ".[$n].Spec.Name") == $service ]]; then
+      id=$(echo $services | jq -r ".[$n].ID")
+      port=$(echo $services | jq -r ".[$n].Endpoint.Ports[0].PublishedPort")
+      nodes="$master $workers"
+      for node in $nodes; do
+        not=""
+        while ! curl -s -o /tmp/resp http://$node:$port ; do
+          echo "${FUNCNAME[0]}: service is not yet active, waiting 10 seconds"
+          sleep 10
+        done
+        curl -s -o /tmp/resp http://$node:$port 
+        if [[ $(grep -c "$match" /tmp/resp) == 0 ]]; then 
+          not="NOT"
+        fi
+        echo "$service service is $not active at address http://$node:$port"
+      done
+      break
+    fi
+    ((n--))
+  done
+}
+
+function delete_service() {
+  echo "${FUNCNAME[0]}: deleting service $1"
+  source /tmp/env.sh
+  service=$1
+  services=$(curl http://$master:4243/services)
+  n=$(echo $services | jq '. | length')
+  ((n--))
+  while [[ $n -ge 0 ]]; do
+    if [[ $(echo $services | jq -r ".[$n].Spec.Name") == $service ]]; then
+      id=$(echo $services | jq -r ".[$n].ID")
+      if ! curl -X DELETE http://$master:4243/services/$id ; then
+        echo "${FUNCNAME[0]}: failed to delete service $1"
+      else
+        echo "${FUNCNAME[0]}: deleted service $1"
+      fi
+      break
+    fi
+    ((n--))
+  done
+}
+
+# Clean the installation
+function clean() {
+  source /tmp/env.sh
+  nodes="$master $workers"
+  for node in $nodes; do
+    ssh -o StrictHostKeyChecking=no -x ubuntu@$node <<EOF
+sudo docker swarm leave --force
+sudo systemctl stop docker
+sudo apt-get remove -y docker-ce
+EOF
+  done
+}
+
+export WORK_DIR=$(pwd)
+case "$1" in
+  setup)
+    setup $2 "$3"
+    ;;
+  ceph)
+    # TODO Ceph support for docker, e.g. re
+    # http://docker.com/docs/docker/latest/en/docker-services/storage-service/
+    # https://github.com/docker/docker/issues/8722
+    # setup_ceph "$2" $3 $4 $5
+    ;;
+  all)
+    start=`date +%s`
+    setup $2 "$3"
+    end=`date +%s`
+    runtime=$((end-start))
+    runtime=$((runtime/60))
+    echo "${FUNCNAME[0]}: Demo duration = $runtime minutes"
+    ;;
+  create)
+    create_service "$2" $3
+    ;;
+  delete)
+    delete_service "$2"
+    ;;
+  clean)
+    clean $2
+    ;;
+  *)
+    if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi
+esac
diff --git a/tools/docker/nginx.json b/tools/docker/nginx.json
new file mode 100644 (file)
index 0000000..a74681f
--- /dev/null
@@ -0,0 +1,67 @@
+{
+  "Name": "nginx",
+  "TaskTemplate": {
+    "ContainerSpec": {
+      "Image": "nginx",
+      "Mounts": [
+        {
+          "ReadOnly": true,
+          "Source": "web-data",
+          "Target": "/usr/share/nginx/html",
+          "Type": "volume",
+          "VolumeOptions": {
+            "DriverConfig": { },
+            "Labels": { "com.example.something": "something-value" }
+            }
+          }
+        ],
+      "DNSConfig": {
+        "Nameservers": [ "8.8.8.8" ],
+        "Search": [ "example.org" ],
+        "Options": [ "timeout:3" ]
+        }
+      },
+    "LogDriver": {
+      "Name": "json-file",
+      "Options": {
+        "max-file": "3",
+        "max-size": "10M"
+        }
+      },
+    "Placement": { },
+    "Resources": {
+      "Limits": {
+        "MemoryBytes": 104857600
+        },
+      "Reservations": { }
+      },
+    "RestartPolicy": {
+      "Condition": "on-failure",
+      "Delay": 10000000000,
+      "MaxAttempts": 10
+      }
+    },
+  "Mode": {
+    "Replicated": {
+      "Replicas": 3
+      }
+    },
+  "UpdateConfig": {
+    "Delay": 30000000000,
+    "Parallelism": 2,
+    "FailureAction": "pause"
+    },
+  "EndpointSpec": {
+    "Ports": [
+      {
+        "Protocol": "tcp",
+        "PublishedPort": 8080,
+        "TargetPort": 80
+        }
+      ]
+    },
+  "Labels": {
+    "foo": "bar"
+    }
+}
+
diff --git a/tools/kubernetes/README.md b/tools/kubernetes/README.md
new file mode 100644 (file)
index 0000000..b8c81f2
--- /dev/null
@@ -0,0 +1,17 @@
+This folder contains scripts etc to setup a kubernetes cluster with the following type of environment and components:
+* hardware
+  * 2 or more bare metal servers
+  * two connected networks (public and private): may work if just a single network
+  * one or more disks on each server: ceph-osd can be setup on an unused disk, or a folder (/ceph) on the host OS disk
+* kubernetes
+  * single master (admin) node
+  * other cluster nodes
+* ceph: ceph-mon on admin, ceph-osd on other nodes
+* helm on admin node
+* demo helm charts, cloned from https://github.com/kubernetes/charts and modified/tested to work on this cluster
+
+See comments in [setup script](k8s-cluster.sh) for more info.
+
+This is a work in progress!
+
+![Resulting Cluster](/docs/images/models-k8s.png?raw=true "Resulting Cluster")
diff --git a/tools/kubernetes/demo_deploy.sh b/tools/kubernetes/demo_deploy.sh
new file mode 100644 (file)
index 0000000..b3d165b
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Complete scripted deployment of an experimental kubernetes-based
+#. cloud-native application platform. When complete, kubernetes and the following
+#. will be installed:
+#. - helm and dokuwiki as a demo helm cart based application
+#. - prometheus + grafana for cluster monitoring/stats
+#. - cloudify + kubernetes plugin and a demo hello world (nginx) app installed
+#.  will be setup with:
+#. Prometheus dashboard: http://<admin_public_ip>:9090
+#. Grafana dashboard: http://<admin_public_ip>:3000
+#. 
+#. Prerequisites:
+#. - Ubuntu server for kubernetes cluster nodes (admin/master and agent nodes)
+#. - MAAS server as cluster admin for kubernetes master/agent nodes
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ bash ~/models/tools/kubernetes/demo_deploy.sh <key> "<hosts>" <admin ip> 
+#.     "<agent ips>" <pub-net> <priv-net> [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <admin ip>: IP of cluster admin node
+#. <agent_ips>: space separated list of agent node IPs
+#. <pub-net>: CID formatted public network
+#. <priv-net>: CIDR formatted private network (may be same as pub-net)
+#. <extras>: optional name of script for extra setup functions as needed
+
+key=$1
+nodes="$2"
+admin_ip=$3
+agent_ips="$4"
+extras=$5
+
+source ~/models/tools/maas/deploy.sh $1 "$2" $5
+eval `ssh-agent`
+ssh-add $key
+if [[ "x$extras" != "x" ]]; then source $extras; fi
+scp -o StrictHostKeyChecking=no $key ubuntu@$admin_ip:/home/ubuntu/$key
+echo "Setting up kubernetes..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+git clone https://gerrit.opnfv.org/gerrit/models
+bash models/tools/kubernetes/k8s-cluster.sh all "$agent_ips" $priv_net $pub_net
+EOF
+# TODO: Figure this out... Have to break the setup into two steps as something
+# causes the ssh session to end before the prometheus setup, if both scripts 
+# (k8s-cluster and prometheus-tools) are in the same ssh session
+echo "Setting up prometheus..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+bash models/tools/prometheus/prometheus-tools.sh all "$agent_ips"
+EOF
+echo "Setting up cloudify..."
+scp models/tools/cloudify/k8s-cloudify.sh ubuntu@$admin_ip:/home/ubuntu/. 
+ssh -x ubuntu@$admin_ip bash k8s-cloudify.sh prereqs
+ssh -x ubuntu@$admin_ip bash k8s-cloudify.sh setup
+ssh -x ubuntu@$admin_ip bash k8s-cloudify.sh demo
+echo "All done!"
diff --git a/tools/kubernetes/k8s-cluster.sh b/tools/kubernetes/k8s-cluster.sh
new file mode 100644 (file)
index 0000000..6a91cdb
--- /dev/null
@@ -0,0 +1,438 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: script to setup a kubernetes cluster with calico as sni
+#. Prerequisites: 
+#. - Ubuntu xenial server for master and agent nodes
+#. - key-based auth setup for ssh/scp between master and agent nodes
+#. - 192.168.0.0/16 should not be used on your server network interface subnets
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ cd ~/models/tools/kubernetes
+#. $ bash k8s-cluster.sh master
+#. $ bash k8s-cluster.sh agents "<nodes>"
+#.     nodes: space-separated list of ceph node IPs
+#. $ bash k8s-cluster.sh ceph "<nodes>" <cluster-net> <public-net> [ceph_dev]
+#.     nodes: space-separated list of ceph node IPs
+#.     cluster-net: CIDR of ceph cluster network e.g. 10.0.0.1/24
+#.     public-net: CIDR of public network
+#.     ceph_dev: disk to use for ceph. ***MUST NOT BE USED FOR ANY OTHER PURPOSE***
+#.               if not provided, ceph data will be stored on osd nodes in /ceph
+#. $ bash k8s-cluster.sh helm
+#.     Setup helm as app kubernetes orchestration tool
+#. $ bash k8s-cluster.sh demo
+#.     Install helm charts for mediawiki and dokuwiki
+#. $ bash k8s-cluster.sh all "<nodes>" <cluster-net> <public-net> [ceph_dev]
+#.     Runs all the steps above
+#.
+#. Status: work in progress, incomplete
+#
+
+function setup_prereqs() {
+  echo "${FUNCNAME[0]}: Create prerequisite setup script"
+  cat <<'EOG' >/tmp/prereqs.sh
+#!/bin/bash
+# Basic server pre-reqs
+sudo apt-get -y remove kubectl kubelet kubeadm
+sudo apt-get update
+sudo apt-get upgrade -y
+# Set hostname on agent nodes
+if [[ "$1" == "agent" ]]; then
+  echo $(ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME | sudo tee -a /etc/hosts
+fi
+# Install docker 1.12 (default for xenial is 1.12.6)
+sudo apt-get install -y docker.io
+sudo service docker start
+export KUBE_VERSION=1.7.5
+# per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
+# Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/
+sudo apt-get update && sudo apt-get install -y apt-transport-https
+curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
+cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list
+deb http://apt.kubernetes.io/ kubernetes-xenial main
+EOF
+sudo apt-get update
+# Next command is to workaround bug resulting in "PersistentVolumeClaim is not bound" for pod startup (remain in Pending)
+# TODO: reverify if this is still an issue in the final working script
+sudo apt-get -y install ceph-common
+sudo apt-get -y install --allow-downgrades kubectl=${KUBE_VERSION}-00 kubelet=${KUBE_VERSION}-00 kubeadm=${KUBE_VERSION}-00
+EOG
+}
+
+function setup_k8s_master() {
+  echo "${FUNCNAME[0]}: Setting up kubernetes master"
+  setup_prereqs
+
+  # Install master 
+  bash /tmp/prereqs.sh master
+  # per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
+  # If the following command fails, run "kubeadm reset" before trying again
+  # --pod-network-cidr=192.168.0.0/16 is required for calico; this should not conflict with your server network interface subnets
+  sudo kubeadm init --pod-network-cidr=192.168.0.0/16 >>/tmp/kubeadm.out
+  cat /tmp/kubeadm.out
+  export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out)
+  echo "${FUNCNAME[0]}: Cluster join command for manual use if needed: $k8s_joincmd"
+
+  # Start cluster
+  echo "${FUNCNAME[0]}: Start the cluster"
+  mkdir -p $HOME/.kube
+  sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config
+  sudo chown $(id -u):$(id -g) $HOME/.kube/config
+  # Deploy pod network
+  echo "${FUNCNAME[0]}: Deploy calico as CNI"
+  sudo kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml
+}
+
+function setup_k8s_agents() {
+  agents="$1"
+  export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out)
+  echo "${FUNCNAME[0]}: Installing agents at $1 with joincmd: $k8s_joincmd"
+
+  setup_prereqs
+
+  kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}')
+  while [[ "$kubedns" != "Running" ]]; do
+    echo "${FUNCNAME[0]}: kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'" 
+    sleep 60
+    kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}')
+  done
+  echo "${FUNCNAME[0]}: kube-dns status is $kubedns" 
+
+  for agent in $agents; do
+    echo "${FUNCNAME[0]}: Install agent at $agent"
+    scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$agent:/tmp/prereqs.sh
+    ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent bash /tmp/prereqs.sh agent
+    # Workaround for "[preflight] Some fatal errors occurred: /var/lib/kubelet is not empty" per https://github.com/kubernetes/kubeadm/issues/1
+    ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent sudo kubeadm reset
+    ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent sudo $k8s_joincmd
+  done
+
+  echo "${FUNCNAME[0]}: Cluster is ready when all nodes in the output of 'kubectl get nodes' show as 'Ready'."
+}
+
+function setup_ceph() {
+  node_ips=$1
+  cluster_net=$2
+  public_net=$3
+  ceph_dev=$4
+  echo "${FUNCNAME[0]}: Deploying ceph-mon on localhost $HOSTNAME"
+  echo "${FUNCNAME[0]}: Deploying ceph-osd on nodes $node_ips"
+  echo "${FUNCNAME[0]}: Setting cluster-network=$cluster_net and public-network=$public_net"
+  mon_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+  all_nodes="$mon_ip $node_ips"
+  # Also caches the server fingerprints so ceph-deploy does not prompt the user
+  # Note this loop may be partially redundant with the ceph-deploy steps below
+  for node_ip in $all_nodes; do
+    echo "${FUNCNAME[0]}: Install ntp and ceph on $node_ip"
+    ssh -x -o StrictHostKeyChecking=no ubuntu@$node_ip <<EOF
+sudo timedatectl set-ntp no
+wget -q -O- 'https://download.ceph.com/keys/release.asc' | sudo apt-key add -
+echo deb https://download.ceph.com/debian/ $(lsb_release -sc) main | sudo tee /etc/apt/sources.list.d/ceph.list
+sudo apt update
+sudo apt-get install -y ntp ceph ceph-deploy
+EOF
+  done
+  
+  # per http://docs.ceph.com/docs/master/start/quick-ceph-deploy/
+  # also https://upcommons.upc.edu/bitstream/handle/2117/101816/Degree_Thesis_Nabil_El_Alami.pdf#vote +1
+  echo "${FUNCNAME[0]}: Create ceph config folder ~/ceph-cluster"
+  mkdir ~/ceph-cluster
+  cd ~/ceph-cluster
+  
+  echo "${FUNCNAME[0]}: Create new cluster with $HOSTNAME as initial ceph-mon node"
+  ceph-deploy new --cluster-network $cluster_net --public-network $public_net --no-ssh-copykey $HOSTNAME
+  # Update conf per recommendations of http://docs.ceph.com/docs/jewel/rados/configuration/filesystem-recommendations/
+  cat <<EOF >>ceph.conf
+osd max object name len = 256
+osd max object namespace len = 64
+EOF
+  cat ceph.conf
+
+  echo "${FUNCNAME[0]}: Deploy ceph packages on other nodes"
+  ceph-deploy install $mon_ip $node_ips
+
+  echo "${FUNCNAME[0]}: Deploy the initial monitor and gather the keys"
+  ceph-deploy mon create-initial
+
+  if [[ "x$ceph_dev" == "x" ]]; then
+    n=1
+    for node_ip in $node_ips; do
+      echo "${FUNCNAME[0]}: Prepare ceph OSD on node $node_ip"
+      echo "$node_ip ceph-osd$n" | sudo tee -a /etc/hosts
+      # Using ceph-osd$n here avoids need for manual acceptance of the new server hash
+      ssh -x -o StrictHostKeyChecking=no ubuntu@ceph-osd$n <<EOF
+echo "$node_ip ceph-osd$n" | sudo tee -a /etc/hosts
+sudo mkdir /ceph && sudo chown -R ceph:ceph /ceph
+EOF
+      ceph-deploy osd prepare ceph-osd$n:/ceph
+      ceph-deploy osd activate ceph-osd$n:/ceph
+      ((n++))
+    done
+  else 
+    echo "${FUNCNAME[0]}: Deploy OSDs"
+    for node_ip in $node_ips; do
+      echo "${FUNCNAME[0]}: Create ceph osd on $node_ip using $ceph_dev"
+      ceph-deploy osd create $node_ip:$ceph_dev
+    done
+  fi
+
+  echo "${FUNCNAME[0]}: Copy the config file and admin key to the admin node and OSD nodes"
+  ceph-deploy admin $mon_ip $node_ips
+
+  echo "${FUNCNAME[0]}: Check the cluster health"
+  sudo ceph health
+  sudo ceph -s
+
+  # per https://crondev.com/kubernetes-persistent-storage-ceph/ and https://github.com/kubernetes/kubernetes/issues/38923
+  # rbd  is not included in default kube-controller-manager... use attcomdev version
+  sudo sed -i -- 's~gcr.io/google_containers/kube-controller-manager-amd64:.*~quay.io/attcomdev/kube-controller-manager:v1.7.3~' /etc/kubernetes/manifests/kube-controller-manager.yaml
+  if [[ $(sudo grep -c attcomdev/kube-controller-manager /etc/kubernetes/manifests/kube-controller-manager.yaml) == 0 ]]; then
+    echo "${FUNCNAME[0]}: Problem patching /etc/kubernetes/manifests/kube-controller-manager.yaml... script update needed"
+    exit 1
+  fi
+  mgr=$(kubectl get pods --all-namespaces | grep kube-controller-manager | awk '{print $4}')
+  while [[ "$mgr" != "Running" ]]; do
+    echo "${FUNCNAME[0]}: kube-controller-manager status is $mgr. Waiting 60 seconds for it to be 'Running'" 
+    sleep 60
+    mgr=$(kubectl get pods --all-namespaces | grep kube-controller-manager | awk '{print $4}')
+  done
+  echo "${FUNCNAME[0]}: kube-controller-manager status is $mgr"
+
+  echo "${FUNCNAME[0]}: Create Ceph admin secret"
+  admin_key=$(sudo ceph auth get-key client.admin)
+  kubectl create secret generic ceph-secret-admin --from-literal=key="$admin_key" --namespace=kube-system --type=kubernetes.io/rbd
+
+  echo "${FUNCNAME[0]}: Create rdb storageClass 'slow'"
+  cat <<EOF >/tmp/ceph-sc.yaml
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+   name: slow
+provisioner: kubernetes.io/rbd
+parameters:
+    monitors: $mon_ip:6789
+    adminId: admin
+    adminSecretName: ceph-secret-admin
+    adminSecretNamespace: "kube-system"
+    pool: kube
+    userId: kube
+    userSecretName: ceph-secret-user
+EOF
+  # TODO: find out where in the above ~/.kube folders became owned by root
+  sudo chown -R ubuntu:ubuntu ~/.kube/*
+  kubectl create -f /tmp/ceph-sc.yaml
+
+  echo "${FUNCNAME[0]}: Create storage pool 'kube'"
+  # https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md method
+  sudo ceph osd pool create kube 32 32
+
+  echo "${FUNCNAME[0]}: Authorize client 'kube' access to pool 'kube'"
+  sudo ceph auth get-or-create client.kube mon 'allow r' osd 'allow rwx pool=kube'
+
+  echo "${FUNCNAME[0]}: Create ceph-secret-user secret in namespace 'default'"
+  kube_key=$(sudo ceph auth get-key client.kube)
+  kubectl create secret generic ceph-secret-user --from-literal=key="$kube_key" --namespace=default --type=kubernetes.io/rbd
+  # A similar secret must be created in other namespaces that intend to access the ceph pool
+
+  # Per https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md
+
+  echo "${FUNCNAME[0]}: Create andtest a persistentVolumeClaim"
+  cat <<EOF >/tmp/ceph-pvc.yaml
+{
+  "kind": "PersistentVolumeClaim",
+  "apiVersion": "v1",
+  "metadata": {
+    "name": "claim1",
+    "annotations": {
+        "volume.beta.kubernetes.io/storage-class": "slow"
+    }
+  },
+  "spec": {
+    "accessModes": [
+      "ReadWriteOnce"
+    ],
+    "resources": {
+      "requests": {
+        "storage": "3Gi"
+      }
+    }
+  }
+}
+EOF
+  kubectl create -f /tmp/ceph-pvc.yaml
+  while [[ "x$(kubectl get pvc -o jsonpath='{.status.phase}' claim1)" != "xBound" ]]; do
+    echo "${FUNCNAME[0]}: Waiting for pvc claim1 to be 'Bound'"
+    kubectl describe pvc 
+    sleep 10
+  done
+  echo "${FUNCNAME[0]}: pvc claim1 successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' claim1)"
+  kubectl get pvc
+  kubectl delete pvc claim1
+  kubectl describe pods
+}
+
+function wait_for_service() {
+  echo "${FUNCNAME[0]}: Waiting for service $1 to be available"
+  pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }")
+  echo "${FUNCNAME[0]}: Service $1 is at pod $pod"
+  ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod)
+  while [[ "$ready" != "true" ]]; do
+    echo "${FUNCNAME[0]}: $1 container is not yet ready... waiting 10 seconds"
+    sleep 10
+    # TODO: figure out why transient pods sometimes mess up this logic, thus need to re-get the pods
+    pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }")
+    ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod)
+  done
+  echo "${FUNCNAME[0]}: pod $pod container status is $ready"
+  host_ip=$(kubectl get pods --namespace default -o jsonpath='{.status.hostIP}' $pod)
+  port=$(kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services $1)
+  echo "${FUNCNAME[0]}: pod $pod container is at host $host_ip and port $port"
+  while ! curl http://$host_ip:$port ; do
+    echo "${FUNCNAME[0]}: $1 service is not yet responding... waiting 10 seconds"
+    sleep 10
+  done
+  echo "${FUNCNAME[0]}: $1 is available at http://$host_ip:$port"
+}
+
+function demo_chart() {
+  cd ~
+  rm -rf charts
+  git clone https://github.com/kubernetes/charts.git
+  cd charts/stable
+  case "$1" in
+    mediawiki)
+      # NOT YET WORKING
+      # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed
+      mkdir ./mediawiki/charts
+      cp -r ./mariadb ./mediawiki/charts
+      # LoadBalancer is N/A for baremetal (public cloud only) - use NodePort
+      sed -i -- 's/LoadBalancer/NodePort/g' ./mediawiki/values.yaml
+      # Select the storageClass created in the ceph setup step
+      sed -i -- 's/# storageClass:/storageClass: "slow"/g' ./mediawiki/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./mediawiki/charts/mariadb/values.yaml
+      helm install --name mw -f ./mediawiki/values.yaml ./mediawiki
+      wait_for_service mw-mediawiki
+      ;;
+    dokuwiki)
+      sed -i -- 's/# storageClass:/storageClass: "slow"/g' ./dokuwiki/values.yaml
+      sed -i -- 's/LoadBalancer/NodePort/g' ./dokuwiki/values.yaml
+      helm install --name dw -f ./dokuwiki/values.yaml ./dokuwiki
+      wait_for_service dw-dokuwiki
+      ;;
+    wordpress)
+      # NOT YET WORKING
+      # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed
+      mkdir ./wordpress/charts
+      cp -r ./mariadb ./wordpress/charts
+      sed -i -- 's/LoadBalancer/NodePort/g' ./wordpress/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./wordpress/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./wordpress/charts/mariadb/values.yaml
+      helm install --name wp -f ./wordpress/values.yaml ./wordpress
+      wait_for_service wp-wordpress
+      ;;
+    redmine)
+      # NOT YET WORKING
+      # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed
+      mkdir ./redmine/charts
+      cp -r ./mariadb ./redmine/charts
+      cp -r ./postgresql ./redmine/charts
+      sed -i -- 's/LoadBalancer/NodePort/g' ./redmine/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/charts/mariadb/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/charts/postgresql/values.yaml
+      helm install --name rdm -f ./redmine/values.yaml ./redmine
+      wait_for_service rdm-redmine
+      ;;
+    owncloud)
+      # NOT YET WORKING: needs resolvable hostname for service
+      mkdir ./owncloud/charts
+      cp -r ./mariadb ./owncloud/charts
+      sed -i -- 's/LoadBalancer/NodePort/g' ./owncloud/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./owncloud/values.yaml
+      sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./owncloud/charts/mariadb/values.yaml
+      helm install --name oc -f ./owncloud/values.yaml ./owncloud
+      wait_for_service oc-owncloud
+      ;;
+    *)
+      echo "${FUNCNAME[0]}: demo not implemented for $1"
+  esac    
+# extra useful commands
+# kubectl describe pvc 
+# kubectl get pvc
+# kubectl describe pods
+# kubectl get pods --namespace default
+# kubectl get pods --all-namespaces
+# kubectl get svc --namespace default dw-dokuwiki
+# kubectl describe svc --namespace default dw-dokuwiki
+# kubectl describe pods --namespace default dw-dokuwiki
+}
+
+function setup_helm() {
+  echo "${FUNCNAME[0]}: Setup helm"
+  # Install Helm
+  cd ~
+  curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get > get_helm.sh
+  chmod 700 get_helm.sh
+  ./get_helm.sh
+  helm init
+  helm repo update
+  # TODO: Workaround for bug https://github.com/kubernetes/helm/issues/2224
+  # For testing use only!
+  kubectl create clusterrolebinding permissive-binding --clusterrole=cluster-admin --user=admin --user=kubelet --group=system:serviceaccounts;
+  # TODO: workaround for tiller FailedScheduling (No nodes are available that match all of the following predicates:: PodToleratesNodeTaints (1).)
+  # kubectl taint nodes $HOSTNAME node-role.kubernetes.io/master:NoSchedule-
+  # Wait till tiller is running
+  tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}')
+  while [[ "$tiller_deploy" != "Running" ]]; do
+    echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy. Waiting 60 seconds for it to be 'Running'" 
+    sleep 60
+    tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}')
+  done
+  echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy"
+
+  # Install services via helm charts from https://kubeapps.com/charts
+  # e.g. helm install stable/dokuwiki
+}
+
+export WORK_DIR=$(pwd)
+case "$1" in
+  master)
+    setup_k8s_master
+    ;;
+  agents)
+    setup_k8s_agents "$2"
+    ;;
+  ceph)
+    setup_ceph "$2" $3 $4 $5
+    ;;
+  helm)
+    setup_helm
+    ;;
+  demo)
+    demo_chart $2
+    ;;
+  all)
+    setup_k8s_master
+    setup_k8s_agents "$2"
+    setup_ceph "$2" $3 $4 $5
+    setup_helm
+    demo_chart dokuwiki
+    ;;
+  clean)
+    # TODO
+    ;;
+  *)
+    if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi
+esac
diff --git a/tools/maas/deploy.sh b/tools/maas/deploy.sh
new file mode 100644 (file)
index 0000000..ae89893
--- /dev/null
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Scripted deployment of servers using MAAS. Currently it deploys
+#. the default host OS as configured in MAAS.
+#. 
+#. Prerequisites:
+#. - MAAS server configured to admin a set of servers
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ source ~/models/tools/maas/demo_deploy.sh <key> "<hosts>" [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <extras>: optional name of script for extra setup functions as needed
+
+function wait_node_status() {
+  status=$(maas opnfv machines read hostname=$1 | jq -r ".[0].status_name")
+  while [[ "x$status" != "x$2" ]]; do
+    echo "$1 status is $status ... waiting for it to be $2"
+    sleep 30
+    status=$(maas opnfv machines read hostname=$1 | jq -r ".[0].status_name")
+  done
+  echo "$1 status is $status"
+}
+
+function release_nodes() {
+  nodes=$1
+  for node in $nodes; do
+    echo "Releasing node $node"
+    id=$(maas opnfv machines read hostname=$node | jq -r '.[0].system_id')
+    maas opnfv machines release machines=$id
+  done
+}
+
+function deploy_nodes() {
+  nodes=$1
+  for node in $nodes; do
+    echo "Deploying node $node"
+    id=$(maas opnfv machines read hostname=$node | jq -r '.[0].system_id')
+    maas opnfv machines allocate system_id=$id
+    maas opnfv machine deploy $id
+  done
+}
+
+function wait_nodes_status() {
+  nodes=$1
+  for node in $nodes; do
+    wait_node_status $node $2
+  done
+}
+
+key=$1
+nodes="$2"
+extras=$3
+
+release_nodes "$nodes"
+wait_nodes_status "$nodes" Ready
+deploy_nodes "$nodes"
+wait_nodes_status "$nodes" Deployed
+eval `ssh-agent`
+ssh-add $key
+if [[ "x$extras" != "x" ]]; then source $extras; fi
diff --git a/tools/prometheus/README.md b/tools/prometheus/README.md
new file mode 100644 (file)
index 0000000..a3dfcc5
--- /dev/null
@@ -0,0 +1,10 @@
+This folder contains scripts etc to setup [prometheus](https://github.com/prometheus/prometheus) on a server cluster. It installs:
+* a prometheus server (on the host OS) and [grafana](https://grafana.com/) (in docker)
+* prometheus exporters on a set of other nodes, to be monitored
+  * [node exporter](https://github.com/prometheus/node_exporter) for node basic analytics
+  * [haproxy exporter](https://github.com/prometheus/haproxy_exporter) for load-balancer stats from haproxy e.g. as use by Rancher
+* several sample grafana dashboards... for more see [grafana dashboards for prometheus](https://grafana.com/dashboards?dataSource=prometheus)
+
+See comments in [prometheus-tools.sh](prometheus-tools.sh) for more info.
+
+This is a work in progress!
diff --git a/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json b/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json
new file mode 100644 (file)
index 0000000..afc69a2
--- /dev/null
@@ -0,0 +1,712 @@
+{
+"dashboard": {
+  "__inputs": [
+    {
+      "name": "Prometheus",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "4.4.3"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "singlestat",
+      "name": "Singlestat",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": []
+  },
+  "description": "Docker Monitoring Template",
+  "editable": true,
+  "gnetId": 179,
+  "graphTooltip": 1,
+  "hideControls": false,
+  "id": null,
+  "links": [],
+  "refresh": "10s",
+  "rows": [
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(50, 172, 45, 0.97)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "format": "percent",
+          "gauge": {
+            "maxValue": 100,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "id": 4,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "span": 4,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "(sum(node_memory_MemTotal) - sum(node_memory_MemFree+node_memory_Buffers+node_memory_Cached) ) / sum(node_memory_MemTotal) * 100",
+              "interval": "10s",
+              "intervalFactor": 1,
+              "refId": "A",
+              "step": 30
+            }
+          ],
+          "thresholds": "65, 90",
+          "title": "Memory usage",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "current"
+        },
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(50, 172, 45, 0.97)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "datasource": "Prometheus",
+          "decimals": 2,
+          "editable": true,
+          "error": false,
+          "format": "percent",
+          "gauge": {
+            "maxValue": 100,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "id": 6,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "span": 4,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) )) / count(node_cpu{mode=\"system\"}) * 100",
+              "interval": "10s",
+              "intervalFactor": 1,
+              "refId": "A",
+              "step": 30
+            }
+          ],
+          "thresholds": "65, 90",
+          "title": "CPU usage",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "current"
+        },
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(50, 172, 45, 0.97)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "datasource": "Prometheus",
+          "decimals": 2,
+          "editable": true,
+          "error": false,
+          "format": "percent",
+          "gauge": {
+            "maxValue": 100,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "id": 7,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "span": 4,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "sum (container_fs_limit_bytes - container_fs_usage_bytes) / sum(container_fs_limit_bytes)",
+              "interval": "10s",
+              "intervalFactor": 1,
+              "metric": "",
+              "refId": "A",
+              "step": 30
+            }
+          ],
+          "thresholds": "65, 90",
+          "title": "Filesystem usage",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "current"
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 3,
+          "editable": true,
+          "error": false,
+          "fill": 0,
+          "grid": {},
+          "id": 3,
+          "legend": {
+            "alignAsTable": true,
+            "avg": true,
+            "current": true,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "sort": "current",
+            "sortDesc": true,
+            "total": false,
+            "values": true
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sort_desc(sum(rate(container_cpu_user_seconds_total{image!=\"\"}[1m])) by (name))",
+              "interval": "10s",
+              "intervalFactor": 1,
+              "legendFormat": "{{ name }}",
+              "metric": "container_cpu_user_seconds_total",
+              "refId": "A",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Container CPU usage",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percentunit",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 2,
+          "editable": true,
+          "error": false,
+          "fill": 0,
+          "grid": {},
+          "id": 2,
+          "legend": {
+            "alignAsTable": true,
+            "avg": true,
+            "current": true,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "sideWidth": 200,
+            "sort": "current",
+            "sortDesc": true,
+            "total": false,
+            "values": true
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (name))",
+              "interval": "10s",
+              "intervalFactor": 1,
+              "legendFormat": "{{ name }}",
+              "metric": "container_memory_usage:sort_desc",
+              "refId": "A",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Container Memory Usage",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 2,
+          "editable": true,
+          "error": false,
+          "fill": 0,
+          "grid": {},
+          "id": 8,
+          "legend": {
+            "alignAsTable": true,
+            "avg": true,
+            "current": true,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "sideWidth": 200,
+            "sort": "current",
+            "sortDesc": true,
+            "total": false,
+            "values": true
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) ))",
+              "interval": "10s",
+              "intervalFactor": 1,
+              "legendFormat": "{{ name }}",
+              "metric": "container_network_receive_bytes_total",
+              "refId": "A",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Container Network Input",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 2,
+          "editable": true,
+          "error": false,
+          "fill": 0,
+          "grid": {},
+          "id": 9,
+          "legend": {
+            "alignAsTable": true,
+            "avg": true,
+            "current": true,
+            "max": false,
+            "min": false,
+            "rightSide": true,
+            "show": true,
+            "sideWidth": 200,
+            "sort": "current",
+            "sortDesc": true,
+            "total": false,
+            "values": true
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) ))",
+              "intervalFactor": 2,
+              "legendFormat": "{{ name }}",
+              "metric": "container_network_transmit_bytes_total",
+              "refId": "B",
+              "step": 4
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Container Network Output",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    }
+  ],
+  "schemaVersion": 14,
+  "style": "dark",
+  "tags": [
+    "docker"
+  ],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "timezone": "browser",
+  "title": "Docker Dashboard",
+  "version": 1
+}
+}
diff --git a/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json b/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json
new file mode 100644 (file)
index 0000000..6db3532
--- /dev/null
@@ -0,0 +1,1618 @@
+{
+"dashboard": {
+  "__inputs": [
+    {
+      "name": "Prometheus",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "4.4.3"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "table",
+      "name": "Table",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": []
+  },
+  "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)",
+  "editable": true,
+  "gnetId": 395,
+  "graphTooltip": 1,
+  "hideControls": false,
+  "id": null,
+  "links": [],
+  "refresh": "10s",
+  "rows": [
+    {
+      "collapse": false,
+      "height": 143.625,
+      "panels": [
+        {
+          "aliasColors": {
+            "SENT": "#BF1B00"
+          },
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 5,
+          "grid": {},
+          "id": 19,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 1,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 2,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)",
+              "intervalFactor": 2,
+              "legendFormat": "RECEIVED",
+              "refId": "A",
+              "step": 10
+            },
+            {
+              "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "SENT",
+              "refId": "B",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Network Traffic on Node",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "transparent": false,
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": false,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {
+            "Ops-Infrastructure": "#447EBC",
+            "{}": "#DEDAF7"
+          },
+          "bars": true,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 0,
+          "editable": true,
+          "error": false,
+          "fill": 3,
+          "grid": {},
+          "id": 7,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": false,
+          "linewidth": 3,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 10,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 1.9899973849372385,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"monitoring\"}[$interval]))",
+              "intervalFactor": 2,
+              "legendFormat": "Monitoring",
+              "metric": "container_last_seen",
+              "refId": "A",
+              "step": 10
+            },
+            {
+              "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"ops-infrastructure\"}[$interval]))",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "Backend-Infrastructure",
+              "refId": "B",
+              "step": 10
+            },
+            {
+              "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"backend-infrastructure\"}[$interval]))",
+              "hide": false,
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Backend-Workers",
+              "refId": "C",
+              "step": 10
+            },
+            {
+              "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"backend-workers\"}[$interval]))",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "Ops-Infrastructure",
+              "refId": "D",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Running Containers (by Container Group)",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": false,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "none",
+              "label": "",
+              "logBase": 1,
+              "max": null,
+              "min": 0,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {
+            "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9"
+          },
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 3,
+          "grid": {},
+          "id": 5,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 2.0707047594142263,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum(rate(container_cpu_system_seconds_total[1m]))",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "a",
+              "refId": "B",
+              "step": 120
+            },
+            {
+              "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))",
+              "hide": true,
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "nur container",
+              "refId": "F",
+              "step": 10
+            },
+            {
+              "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))",
+              "hide": true,
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "nur docker host",
+              "metric": "",
+              "refId": "A",
+              "step": 20
+            },
+            {
+              "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100",
+              "hide": false,
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "host",
+              "metric": "",
+              "refId": "C",
+              "step": 10
+            },
+            {
+              "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "",
+              "refId": "D",
+              "step": 120
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "CPU Usage on Node",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": false,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "label": "",
+              "logBase": 1,
+              "max": 120,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {
+            "Belegete Festplatte": "#BF1B00",
+            "Free Disk Space": "#7EB26D",
+            "Used Disk Space": "#BF1B00",
+            "{}": "#BF1B00"
+          },
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 4,
+          "grid": {},
+          "id": 13,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 3,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 2,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "node_filesystem_free{fstype=\"aufs\"}",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "Free Disk Space",
+              "refId": "A",
+              "step": 10
+            },
+            {
+              "expr": "node_filesystem_size{fstype=\"aufs\"} - node_filesystem_free{fstype=\"aufs\"}",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "Used Disk Space",
+              "refId": "B",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Free and Used Disk Space on Node",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": false,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": "",
+              "logBase": 1,
+              "max": null,
+              "min": 0,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {
+            "Available Memory": "#7EB26D",
+            "Unavailable Memory": "#BF1B00"
+          },
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 4,
+          "grid": {},
+          "id": 20,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 3,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 2,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "container_memory_rss{name=~\".+\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "D",
+              "step": 30
+            },
+            {
+              "expr": "sum(container_memory_rss{name=~\".+\"})",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "A",
+              "step": 20
+            },
+            {
+              "expr": "container_memory_usage_bytes{name=~\".+\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "B",
+              "step": 20
+            },
+            {
+              "expr": "container_memory_rss{id=\"/\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "C",
+              "step": 30
+            },
+            {
+              "expr": "sum(container_memory_rss)",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "E",
+              "step": 30
+            },
+            {
+              "expr": "node_memory_Buffers",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "node_memory_Dirty",
+              "refId": "N",
+              "step": 30
+            },
+            {
+              "expr": "node_memory_MemFree",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "F",
+              "step": 30
+            },
+            {
+              "expr": "node_memory_MemAvailable",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "Available Memory",
+              "refId": "H",
+              "step": 10
+            },
+            {
+              "expr": "node_memory_MemTotal - node_memory_MemAvailable",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "Unavailable Memory",
+              "refId": "G",
+              "step": 10
+            },
+            {
+              "expr": "node_memory_Inactive",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "I",
+              "step": 30
+            },
+            {
+              "expr": "node_memory_KernelStack",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "J",
+              "step": 30
+            },
+            {
+              "expr": "node_memory_Active",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "K",
+              "step": 30
+            },
+            {
+              "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "Unknown",
+              "refId": "L",
+              "step": 40
+            },
+            {
+              "expr": "node_memory_MemFree + node_memory_Inactive ",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "M",
+              "step": 30
+            },
+            {
+              "expr": "container_memory_rss{name=~\".+\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "refId": "O",
+              "step": 30
+            },
+            {
+              "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "",
+              "refId": "P",
+              "step": 40
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Available Memory on Node",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": false,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": "",
+              "logBase": 1,
+              "max": 4200000000,
+              "min": 0,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 3,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": false,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 1.939297855648535,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum(rate(node_disk_bytes_read[$interval])) by (device)",
+              "intervalFactor": 2,
+              "legendFormat": "OUT on /{{device}}",
+              "metric": "node_disk_bytes_read",
+              "refId": "A",
+              "step": 10
+            },
+            {
+              "expr": "sum(rate(node_disk_bytes_written[$interval])) by (device)",
+              "intervalFactor": 2,
+              "legendFormat": "IN on /{{device}}",
+              "metric": "",
+              "refId": "B",
+              "step": 10
+            },
+            {
+              "expr": "",
+              "intervalFactor": 2,
+              "refId": "C"
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Disk I/O on Node",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": false,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "Bps",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": 284.609375,
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 5,
+          "grid": {},
+          "id": 1,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 6.0790694124949285,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[$interval])) by (name) * 100",
+              "hide": false,
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "metric": "container_cp",
+              "refId": "F",
+              "step": 2
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "CPU Usage per Container (Stacked)",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "label": "",
+              "logBase": 1,
+              "max": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {
+            "node_load15": "#CCA300"
+          },
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 0,
+          "grid": {},
+          "id": 4,
+          "legend": {
+            "avg": false,
+            "current": true,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": true
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 5.920930587505071,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "{__name__=~\"^node_load.*\"}",
+              "intervalFactor": 2,
+              "legendFormat": "{{__name__}}",
+              "metric": "node",
+              "refId": "A",
+              "step": 4
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "System Load on Node",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": 203.515625,
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 9,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "hideEmpty": false,
+            "hideZero": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 6,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])) by (name)",
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "A",
+              "step": 2
+            },
+            {
+              "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "",
+              "refId": "B",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Sent Network Traffic per Container",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "transparent": false,
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "Bps",
+              "label": "",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": "",
+              "logBase": 10,
+              "max": 8,
+              "min": 0,
+              "show": false
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 3,
+          "grid": {},
+          "id": 10,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 6,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum(container_memory_rss{name=~\".+\"}) by (name)",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "A",
+              "step": 2
+            },
+            {
+              "expr": "container_memory_usage_bytes{name=~\".+\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "B",
+              "step": 240
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Memory Usage per Container (Stacked)",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": "",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": 222.703125,
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 8,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 6,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[$interval])) by (name)",
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "A",
+              "step": 2
+            },
+            {
+              "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "B",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Received Network Traffic per Container",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "transparent": false,
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "Bps",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 3,
+          "grid": {},
+          "id": 11,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "null as zero",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 6,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "container_memory_rss{name=~\".+\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "A",
+              "step": 20
+            },
+            {
+              "expr": "container_memory_usage_bytes{name=~\".+\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "B",
+              "step": 20
+            },
+            {
+              "expr": "sum(container_memory_cache{name=~\".+\"}) by (name)",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "{{name}}",
+              "refId": "C",
+              "step": 2
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Cached Memory per Container (Stacked)",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": false
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "columns": [
+            {
+              "text": "Avg",
+              "value": "avg"
+            }
+          ],
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fontSize": "100%",
+          "hideTimeOverride": false,
+          "id": 18,
+          "links": [],
+          "pageSize": 100,
+          "scroll": true,
+          "showHeader": true,
+          "sort": {
+            "col": 0,
+            "desc": true
+          },
+          "span": 6,
+          "styles": [
+            {
+              "dateFormat": "YYYY-MM-DD HH:mm:ss",
+              "pattern": "Time",
+              "type": "date"
+            },
+            {
+              "colorMode": null,
+              "colors": [
+                "rgba(245, 54, 54, 0.9)",
+                "rgba(237, 129, 40, 0.89)",
+                "rgba(50, 172, 45, 0.97)"
+              ],
+              "decimals": 2,
+              "pattern": "/.*/",
+              "thresholds": [],
+              "type": "number",
+              "unit": "short"
+            }
+          ],
+          "targets": [
+            {
+              "expr": "cadvisor_version_info",
+              "intervalFactor": 2,
+              "legendFormat": "cAdvisor Version: {{cadvisorVersion}}",
+              "refId": "A",
+              "step": 2
+            },
+            {
+              "expr": "prometheus_build_info",
+              "intervalFactor": 2,
+              "legendFormat": "Prometheus Version: {{version}}",
+              "refId": "B",
+              "step": 2
+            },
+            {
+              "expr": "node_exporter_build_info",
+              "intervalFactor": 2,
+              "legendFormat": "Node-Exporter Version: {{version}}",
+              "refId": "C",
+              "step": 2
+            },
+            {
+              "expr": "cadvisor_version_info",
+              "intervalFactor": 2,
+              "legendFormat": "Docker Version: {{dockerVersion}}",
+              "refId": "D",
+              "step": 2
+            },
+            {
+              "expr": "cadvisor_version_info",
+              "intervalFactor": 2,
+              "legendFormat": "Host OS Version: {{osVersion}}",
+              "refId": "E",
+              "step": 2
+            },
+            {
+              "expr": "cadvisor_version_info",
+              "intervalFactor": 2,
+              "legendFormat": "Host Kernel Version: {{kernelVersion}}",
+              "refId": "F",
+              "step": 2
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "",
+          "transform": "timeseries_aggregations",
+          "type": "table"
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Check this out",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": 290.98582985381427,
+      "panels": [],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": 127,
+      "panels": [],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    }
+  ],
+  "schemaVersion": 14,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "allValue": ".+",
+        "current": {},
+        "datasource": "Prometheus",
+        "hide": 0,
+        "includeAll": true,
+        "label": "Container Group",
+        "multi": true,
+        "name": "containergroup",
+        "options": [],
+        "query": "label_values(container_group)",
+        "refresh": 1,
+        "regex": "",
+        "sort": 0,
+        "tagValuesQuery": "",
+        "tags": [],
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      },
+      {
+        "auto": true,
+        "auto_count": 50,
+        "auto_min": "50s",
+        "current": {
+          "text": "auto",
+          "value": "$__auto_interval"
+        },
+        "datasource": null,
+        "hide": 0,
+        "includeAll": false,
+        "label": "Interval",
+        "multi": false,
+        "name": "interval",
+        "options": [
+          {
+            "selected": true,
+            "text": "auto",
+            "value": "$__auto_interval"
+          },
+          {
+            "selected": false,
+            "text": "30s",
+            "value": "30s"
+          },
+          {
+            "selected": false,
+            "text": "1m",
+            "value": "1m"
+          },
+          {
+            "selected": false,
+            "text": "2m",
+            "value": "2m"
+          },
+          {
+            "selected": false,
+            "text": "3m",
+            "value": "3m"
+          },
+          {
+            "selected": false,
+            "text": "5m",
+            "value": "5m"
+          },
+          {
+            "selected": false,
+            "text": "7m",
+            "value": "7m"
+          },
+          {
+            "selected": false,
+            "text": "10m",
+            "value": "10m"
+          },
+          {
+            "selected": false,
+            "text": "30m",
+            "value": "30m"
+          },
+          {
+            "selected": false,
+            "text": "1h",
+            "value": "1h"
+          },
+          {
+            "selected": false,
+            "text": "6h",
+            "value": "6h"
+          },
+          {
+            "selected": false,
+            "text": "12h",
+            "value": "12h"
+          },
+          {
+            "selected": false,
+            "text": "1d",
+            "value": "1d"
+          },
+          {
+            "selected": false,
+            "text": "7d",
+            "value": "7d"
+          },
+          {
+            "selected": false,
+            "text": "14d",
+            "value": "14d"
+          },
+          {
+            "selected": false,
+            "text": "30d",
+            "value": "30d"
+          }
+        ],
+        "query": "30s,1m,2m,3m,5m,7m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+        "refresh": 2,
+        "type": "interval"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "timezone": "browser",
+  "title": "Docker Host & Container Overview",
+  "version": 1
+}
+}
diff --git a/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json b/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json
new file mode 100644 (file)
index 0000000..da65d4a
--- /dev/null
@@ -0,0 +1,1632 @@
+{
+"dashboard": {
+  "__inputs": [
+    {
+      "name": "Prometheus",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "4.4.3"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "singlestat",
+      "name": "Singlestat",
+      "version": ""
+    },
+    {
+      "type": "panel",
+      "id": "text",
+      "name": "Text",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": []
+  },
+  "description": "Dashboard to view multiple servers",
+  "editable": true,
+  "gnetId": 405,
+  "graphTooltip": 0,
+  "hideControls": false,
+  "id": null,
+  "links": [],
+  "rows": [
+    {
+      "collapse": false,
+      "height": "25px",
+      "panels": [
+        {
+          "content": "",
+          "editable": true,
+          "error": false,
+          "id": 11,
+          "minSpan": 2,
+          "mode": "html",
+          "repeat": "node",
+          "span": 12,
+          "style": {},
+          "title": "$node",
+          "type": "text"
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Title",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "25px",
+      "panels": [
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "format": "none",
+          "gauge": {
+            "maxValue": 100,
+            "minValue": 0,
+            "show": false,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "id": 20,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "repeat": "node",
+          "span": 12,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "count(node_cpu{instance=~\"$node\", mode=\"system\"})",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "",
+              "metric": "",
+              "refId": "A",
+              "step": 14400,
+              "target": ""
+            }
+          ],
+          "thresholds": "",
+          "title": "CPU Cores",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "avg"
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 3,
+          "editable": true,
+          "error": false,
+          "fill": 10,
+          "grid": {},
+          "id": 7,
+          "legend": {
+            "alignAsTable": false,
+            "avg": false,
+            "current": false,
+            "hideEmpty": false,
+            "max": false,
+            "min": false,
+            "rightSide": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 0,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": true,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode=\"system\",instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "{{mode}}",
+              "metric": "",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='user',instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "user",
+              "refId": "B",
+              "step": 1200
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='nice',instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "nice",
+              "refId": "C",
+              "step": 1200
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='iowait',instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "iowait",
+              "refId": "E",
+              "step": 1200
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='steal',instance=~'$node'}[5m]))",
+              "intervalFactor": 2,
+              "legendFormat": "steal",
+              "refId": "H",
+              "step": 1200
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='idle',instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "idle",
+              "refId": "D",
+              "step": 1200
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='irq',instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "irq",
+              "refId": "F",
+              "step": 1200
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='softirq',instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "softirq",
+              "refId": "G",
+              "step": 1200
+            },
+            {
+              "expr": "sum by (mode)(irate(node_cpu{mode='guest',instance=~'$node'}[5m]))",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "guest",
+              "refId": "I",
+              "step": 1200
+            }
+          ],
+          "thresholds": [
+            {
+              "colorMode": "custom",
+              "fill": true,
+              "fillColor": "rgba(216, 200, 27, 0.27)",
+              "op": "gt",
+              "value": 0
+            }
+          ],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "CPU",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "label": "%",
+              "logBase": 1,
+              "max": 100,
+              "min": 0,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "CPU",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {
+            "Slab": "#E5A8E2",
+            "Swap": "#E24D42"
+          },
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 2,
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 17,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [
+            {
+              "alias": "/Apps|Buffers|Cached|Free|Slab|SwapCached|PageTables|VmallocUsed/",
+              "fill": 5,
+              "stack": true
+            },
+            {
+              "alias": "Swap",
+              "fill": 5,
+              "stack": true
+            }
+          ],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "( node_memory_MemTotal{instance=~'$node'} - node_memory_MemFree{instance=~'$node'} - node_memory_Buffers{instance=~'$node'} - node_memory_Cached{instance=~'$node'} - node_memory_SwapCached{instance=~'$node'} - node_memory_Slab{instance=~'$node'} - node_memory_PageTables{instance=~'$node'} - node_memory_VmallocUsed{instance=~'$node'} )",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Apps",
+              "metric": "",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            },
+            {
+              "expr": "node_memory_Buffers{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Buffers",
+              "refId": "B",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_Cached{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Cached",
+              "refId": "D",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_MemFree{instance=~'$node'}",
+              "hide": false,
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Free",
+              "refId": "E",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_Slab{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Slab",
+              "refId": "F",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_SwapCached{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "SwapCached",
+              "refId": "G",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_PageTables{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "PageTables",
+              "refId": "H",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_VmallocUsed{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "VmallocUsed",
+              "metric": "",
+              "refId": "I",
+              "step": 1200
+            },
+            {
+              "expr": "(node_memory_SwapTotal{instance=~'$node'} - node_memory_SwapFree{instance=~'$node'})",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Swap",
+              "metric": "",
+              "refId": "C",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_Committed_AS{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Committed",
+              "metric": "",
+              "refId": "J",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_Mapped{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Mapped",
+              "refId": "K",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_Active{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Active",
+              "metric": "",
+              "refId": "L",
+              "step": 1200
+            },
+            {
+              "expr": "node_memory_Inactive{instance=~'$node'}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Inactive",
+              "metric": "",
+              "refId": "M",
+              "step": 1200
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Memory",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": "GB",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Memory",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 13,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "node_load1{instance=~\"$node\"}",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "load",
+              "metric": "",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Load",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Load",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "decimals": 3,
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 9,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "100.0 - 100 * (node_filesystem_avail{instance=~'$node',device !~'tmpfs',device!~'by-uuid'} / node_filesystem_size{instance=~'$node',device !~'tmpfs',device!~'by-uuid'})",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "{{mountpoint}}",
+              "metric": "",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Disk Space Used",
+          "tooltip": {
+            "msResolution": true,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "logBase": 1,
+              "max": 100,
+              "min": 0,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Disk Used",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 19,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(node_disk_io_time_ms{instance=~\"$node\"}[5m])/10",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "{{device}}",
+              "metric": "",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Disk Utilization per Device",
+          "tooltip": {
+            "msResolution": false,
+            "shared": false,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "logBase": 1,
+              "max": 100,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Disk Utilization",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 14,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [
+            {
+              "alias": "/.*_read$/",
+              "transform": "negative-Y"
+            }
+          ],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(node_disk_reads_completed{instance=~'$node'}[5m])",
+              "interval": "",
+              "intervalFactor": 4,
+              "legendFormat": "{{device}}_read",
+              "metric": "",
+              "refId": "A",
+              "step": 2400,
+              "target": ""
+            },
+            {
+              "expr": "irate(node_disk_writes_completed{instance=~'$node'}[5m])",
+              "intervalFactor": 2,
+              "legendFormat": "{{device}}_write",
+              "metric": "",
+              "refId": "B",
+              "step": 1200
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Disk IOs per Device",
+          "tooltip": {
+            "msResolution": false,
+            "shared": false,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "label": "IO/second read (-) / write (+)",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Disk IOs per device",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 18,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [
+            {
+              "alias": "/.*_read/",
+              "transform": "negative-Y"
+            }
+          ],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(node_disk_sectors_read{instance=~'$node'}[5m]) * 512",
+              "interval": "",
+              "intervalFactor": 4,
+              "legendFormat": "{{device}}_read",
+              "refId": "B",
+              "step": 2400
+            },
+            {
+              "expr": "irate(node_disk_sectors_written{instance=~'$node'}[5m]) * 512",
+              "interval": "",
+              "intervalFactor": 4,
+              "legendFormat": "{{device}}_write",
+              "metric": "",
+              "refId": "A",
+              "step": 2400,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Disk Throughput per Device",
+          "tooltip": {
+            "msResolution": false,
+            "shared": false,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": "Bytes/second read (-) / write (+)",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Disk Throughput per device",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 22,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(node_context_switches{instance=~\"$node\"}[5m])",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "context switches",
+              "metric": "",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Context Switches",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "Network Traffic",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 12,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [
+            {
+              "alias": "/.*_in/",
+              "transform": "negative-Y"
+            }
+          ],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(node_network_receive_bytes{instance=~'$node'}[5m])*8",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "{{device}}_in",
+              "metric": "",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            },
+            {
+              "expr": "irate(node_network_transmit_bytes{instance=~'$node'}[5m])*8",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "{{device}}_out",
+              "refId": "B",
+              "step": 1200
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Network Traffic",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bits",
+              "label": "bits in (-) / bits out (+)",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 21,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}",
+              "intervalFactor": 2,
+              "legendFormat": "established",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Netstat",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 23,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [
+            {
+              "alias": "/.*Out.*/",
+              "transform": "negative-Y"
+            },
+            {
+              "alias": "Udp_NoPorts",
+              "yaxis": 2
+            }
+          ],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(node_netstat_Udp_InDatagrams{instance=~\"$node\"}[5m])",
+              "intervalFactor": 2,
+              "legendFormat": "Udp_InDatagrams",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            },
+            {
+              "expr": "irate(node_netstat_Udp_InErrors{instance=~\"$node\"}[5m])",
+              "intervalFactor": 2,
+              "legendFormat": "Udp_InErrors",
+              "refId": "B",
+              "step": 1200
+            },
+            {
+              "expr": "irate(node_netstat_Udp_OutDatagrams{instance=~\"$node\"}[5m])",
+              "interval": "",
+              "intervalFactor": 2,
+              "legendFormat": "Udp_OutDatagrams",
+              "refId": "C",
+              "step": 1200
+            },
+            {
+              "expr": "irate(node_netstat_Udp_NoPorts{instance=~\"$node\"}[5m])",
+              "intervalFactor": 2,
+              "legendFormat": "Udp_NoPorts",
+              "refId": "D",
+              "step": 1200
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "UDP Stats",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 24,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "minSpan": 2,
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "repeat": "node",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "node_nf_conntrack_entries_limit{instance=~\"$node\"} - node_nf_conntrack_entries{instance=~\"$node\"}",
+              "intervalFactor": 2,
+              "legendFormat": "free",
+              "refId": "A",
+              "step": 1200,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Conntrack",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    }
+  ],
+  "schemaVersion": 14,
+  "style": "dark",
+  "tags": [
+    "prometheus"
+  ],
+  "templating": {
+    "list": [
+      {
+        "allFormat": "glob",
+        "allValue": null,
+        "current": {},
+        "datasource": "Prometheus",
+        "hide": 0,
+        "includeAll": false,
+        "label": "",
+        "multi": true,
+        "multiFormat": "regex values",
+        "name": "node",
+        "options": [],
+        "query": "label_values(node_boot_time, instance)",
+        "refresh": 1,
+        "regex": "",
+        "sort": 1,
+        "tagValuesQuery": "",
+        "tags": [],
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      }
+    ]
+  },
+  "time": {
+    "from": "now-7d",
+    "to": "now"
+  },
+  "timepicker": {
+    "now": true,
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "timezone": "browser",
+  "title": "Node Exporter Server Metrics",
+  "version": 1
+}
+}
diff --git a/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json b/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json
new file mode 100644 (file)
index 0000000..5dee4b9
--- /dev/null
@@ -0,0 +1,792 @@
+{
+"dashboard": {
+  "__inputs": [
+    {
+      "name": "Prometheus",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "4.4.3"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "singlestat",
+      "name": "Singlestat",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": []
+  },
+  "description": "Dashboard to get an overview of one server",
+  "editable": true,
+  "gnetId": 22,
+  "graphTooltip": 0,
+  "hideControls": false,
+  "id": null,
+  "links": [],
+  "refresh": false,
+  "rows": [
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 3,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 6,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=~\"$server\"}[5m])) * 100)",
+              "hide": false,
+              "intervalFactor": 10,
+              "legendFormat": "{{cpu}}",
+              "refId": "A",
+              "step": 50
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Idle cpu",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percent",
+              "label": "cpu usage",
+              "logBase": 1,
+              "max": 100,
+              "min": 0,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 9,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 6,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "node_load1{instance=~\"$server\"}",
+              "intervalFactor": 4,
+              "legendFormat": "load 1m",
+              "refId": "A",
+              "step": 20,
+              "target": ""
+            },
+            {
+              "expr": "node_load5{instance=~\"$server\"}",
+              "intervalFactor": 4,
+              "legendFormat": "load 5m",
+              "refId": "B",
+              "step": 20,
+              "target": ""
+            },
+            {
+              "expr": "node_load15{instance=~\"$server\"}",
+              "intervalFactor": 4,
+              "legendFormat": "load 15m",
+              "refId": "C",
+              "step": 20,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "System load",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "percentunit",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 4,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [
+            {
+              "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}",
+              "yaxis": 2
+            }
+          ],
+          "spaceLength": 10,
+          "span": 10,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "node_memory_MemTotal{instance=~\"$server\"} - node_memory_MemFree{instance=~\"$server\"}",
+              "intervalFactor": 2,
+              "legendFormat": "free memory",
+              "metric": "memo",
+              "refId": "A",
+              "step": 10,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Free memory",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        },
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "format": "percent",
+          "gauge": {
+            "maxValue": 100,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "id": 5,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "span": 2,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "(node_memory_MemFree{instance=~\"$server\"} / node_memory_MemTotal{instance=~\"$server\"}) * 100",
+              "intervalFactor": 2,
+              "refId": "A",
+              "step": 60,
+              "target": ""
+            }
+          ],
+          "thresholds": "10, 20",
+          "title": "Free memory",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "avg"
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 6,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [
+            {
+              "alias": "read",
+              "yaxis": 1
+            },
+            {
+              "alias": "{instance=\"172.17.0.1:9100\"}",
+              "yaxis": 2
+            },
+            {
+              "alias": "io time",
+              "yaxis": 2
+            }
+          ],
+          "spaceLength": 10,
+          "span": 10,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "sum by (instance) (irate(node_disk_bytes_read{instance=~\"$server\"}[5m]))",
+              "hide": false,
+              "intervalFactor": 4,
+              "legendFormat": "read",
+              "refId": "A",
+              "step": 20,
+              "target": ""
+            },
+            {
+              "expr": "sum by (instance) (irate(node_disk_bytes_written{instance=~\"$server\"}[5m]))",
+              "intervalFactor": 4,
+              "legendFormat": "written",
+              "refId": "B",
+              "step": 20
+            },
+            {
+              "expr": "sum by (instance) (irate(node_disk_io_time_ms{instance=~\"$server\"}[5m]))",
+              "intervalFactor": 4,
+              "legendFormat": "io time",
+              "refId": "C",
+              "step": 20
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Disk usage",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "ms",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        },
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "format": "percentunit",
+          "gauge": {
+            "maxValue": 1,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": true
+          },
+          "id": 7,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "span": 2,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "min(node_filesystem_free{device!=\"rootfs\",instance=~\"$server\"} / node_filesystem_size{device!=\"rootfs\",instance=~\"$server\"})",
+              "intervalFactor": 2,
+              "refId": "A",
+              "step": 60,
+              "target": ""
+            }
+          ],
+          "thresholds": "0.10, 0.25",
+          "title": "Free disk space (lowest mountpoint)",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "current"
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": "250px",
+      "panels": [
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": "Prometheus",
+          "editable": true,
+          "error": false,
+          "fill": 1,
+          "grid": {},
+          "id": 8,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 2,
+          "links": [],
+          "nullPointMode": "connected",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [
+            {
+              "alias": "transmitted ",
+              "yaxis": 2
+            }
+          ],
+          "spaceLength": 10,
+          "span": 12,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(node_network_receive_bytes{instance=~\"$server\",device!~\"lo\"}[5m])",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "received",
+              "refId": "A",
+              "step": 4,
+              "target": ""
+            },
+            {
+              "expr": "irate(node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}[5m])",
+              "hide": false,
+              "intervalFactor": 2,
+              "legendFormat": "transmitted ",
+              "refId": "B",
+              "step": 4,
+              "target": ""
+            },
+            {
+              "expr": "node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}",
+              "hide": true,
+              "intervalFactor": 2,
+              "legendFormat": "transmitted ",
+              "refId": "C",
+              "step": 2,
+              "target": ""
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Data transfer",
+          "tooltip": {
+            "msResolution": false,
+            "shared": true,
+            "sort": 0,
+            "value_type": "cumulative"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            },
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": false,
+      "title": "New row",
+      "titleSize": "h6"
+    }
+  ],
+  "schemaVersion": 14,
+  "style": "dark",
+  "tags": [
+    "prometheus"
+  ],
+  "templating": {
+    "list": [
+      {
+        "allValue": null,
+        "current": {},
+        "datasource": "Prometheus",
+        "hide": 0,
+        "includeAll": false,
+        "label": null,
+        "multi": false,
+        "name": "server",
+        "options": [],
+        "query": "label_values(node_boot_time, instance)",
+        "refresh": 1,
+        "regex": "",
+        "sort": 0,
+        "tagValuesQuery": "",
+        "tags": [],
+        "tagsQuery": "",
+        "type": "query",
+        "useTags": false
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ],
+    "time_options": [
+      "5m",
+      "15m",
+      "1h",
+      "6h",
+      "12h",
+      "24h",
+      "2d",
+      "7d",
+      "30d"
+    ]
+  },
+  "timezone": "browser",
+  "title": "Node exporter single server",
+  "version": 1
+}
+}
diff --git a/tools/prometheus/prometheus-tools.sh b/tools/prometheus/prometheus-tools.sh
new file mode 100644 (file)
index 0000000..ed6eb22
--- /dev/null
@@ -0,0 +1,228 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Functions for testing with Prometheus and Grafana. Sets up
+#.   Prometheus and Grafana on a master node (e.g. for kubernetes, docker, 
+#.   rancher, openstack) and agent nodes (where applications run).
+#. Prerequisites: 
+#. - Ubuntu server for master and agent nodes
+#. - Docker installed
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ cd ~/models/tools/prometheus
+#. $ bash prometheus-tools.sh setup "<list of agent nodes>"
+#. <list of agent nodes>: space separated IP of agent nodes
+#. $ bash prometheus-tools.sh grafana
+#.   Runs grafana in a docker container and connects to prometheus as datasource
+#. $ bash prometheus-tools.sh all "<list of agent nodes>"
+#.   Does all of the above
+#. $ bash prometheus-tools.sh clean "<list of agent nodes>"
+#
+
+# Prometheus links
+# https://prometheus.io/download/
+# https://prometheus.io/docs/introduction/getting_started/
+# https://github.com/prometheus/prometheus
+# https://prometheus.io/docs/instrumenting/exporters/
+# https://github.com/prometheus/node_exporter
+# https://github.com/prometheus/haproxy_exporter
+# https://github.com/prometheus/collectd_exporter
+
+# Use this to trigger fail() at the right places
+# if [ "$RESULT" == "Test Failed!" ]; then fail "message"; fi
+function fail() {
+  echo "$1"
+  exit 1
+}
+
+function setup_prometheus() {
+  # Prerequisites
+  echo "${FUNCNAME[0]}: Setting up prometheus master and agents"
+  sudo apt install -y golang-go jq
+
+  # Install Prometheus server
+  echo "${FUNCNAME[0]}: Setting up prometheus master"
+  if [[ -d ~/prometheus ]]; then rm -rf ~/prometheus; fi
+  mkdir ~/prometheus
+  mkdir ~/prometheus/dashboards
+  cp -r dashboards/* ~/prometheus/dashboards
+  cd  ~/prometheus
+  wget https://github.com/prometheus/prometheus/releases/download/v2.0.0-beta.2/prometheus-2.0.0-beta.2.linux-amd64.tar.gz
+  tar xvfz prometheus-*.tar.gz
+  cd prometheus-*
+  # Customize prometheus.yml below for your server IPs
+  # This example assumes the node_exporter and haproxy_exporter will be installed on each node
+  cat <<'EOF' >prometheus.yml
+global:
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+    monitor: 'codelab-monitor'
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'prometheus'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    static_configs:
+EOF
+
+  for node in $nodes; do
+    echo "      - targets: ['${node}:9100']" >>prometheus.yml
+    echo "      - targets: ['${node}:9101']" >>prometheus.yml
+  done
+
+  # Start Prometheus
+  nohup ./prometheus --config.file=prometheus.yml > /dev/null 2>&1 &
+  # Browse to http://host_ip:9090
+
+  echo "${FUNCNAME[0]}: Installing exporters"
+  # Install exporters
+  # https://github.com/prometheus/node_exporter
+  cd ~/prometheus
+  wget https://github.com/prometheus/node_exporter/releases/download/v0.14.0/node_exporter-0.14.0.linux-amd64.tar.gz
+  tar xvfz node*.tar.gz
+  # https://github.com/prometheus/haproxy_exporter
+  wget https://github.com/prometheus/haproxy_exporter/releases/download/v0.7.1/haproxy_exporter-0.7.1.linux-amd64.tar.gz
+  tar xvfz haproxy*.tar.gz
+
+  # The scp and ssh actions below assume you have key-based access enabled to the nodes
+  for node in $nodes; do
+    echo "${FUNCNAME[0]}: Setup agent at $node"
+    scp -r -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+      node_exporter-0.14.0.linux-amd64/node_exporter ubuntu@$node:/home/ubuntu/node_exporter
+    ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+      ubuntu@$node "nohup ./node_exporter > /dev/null 2>&1 &"
+    scp -r -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+      haproxy_exporter-0.7.1.linux-amd64/haproxy_exporter ubuntu@$node:/home/ubuntu/haproxy_exporter
+    ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+      ubuntu@$node "nohup ./haproxy_exporter > /dev/null 2>&1 &"
+  done
+
+  host_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+  while ! curl -o /tmp/up http://$host_ip:9090/api/v1/query?query=up ; do
+    echo "${FUNCNAME[0]}: Prometheus API is not yet responding... waiting 10 seconds"
+    sleep 10
+  done
+
+  exp=$(jq '.data.result|length' /tmp/up)
+  echo "${FUNCNAME[0]}: $exp exporters are up"
+  while [[ $exp > 0 ]]; do
+    ((exp--))
+    eip=$(jq -r ".data.result[$exp].metric.instance" /tmp/up)
+    job=$(jq -r ".data.result[$exp].metric.job" /tmp/up)
+    echo "${FUNCNAME[0]}: $job at $eip"
+  done
+  echo "${FUNCNAME[0]}: Prometheus dashboard is available at http://$host_ip:9090"
+  echo "Prometheus dashboard is available at http://$host_ip:9090" auto>/tmp/summary
+}
+
+function connect_grafana() {
+  echo "${FUNCNAME[0]}: Setup Grafana datasources and dashboards"
+  prometheus_ip=$1
+  grafana_ip=$2
+
+  while ! curl -X POST http://admin:admin@$grafana_ip:3000/api/login/ping ; do
+    echo "${FUNCNAME[0]}: Grafana API is not yet responding... waiting 10 seconds"
+    sleep 10
+  done
+
+  echo "${FUNCNAME[0]}: Setup Prometheus datasource for Grafana"
+  cd ~/prometheus/
+  cat >datasources.json <<EOF
+{"name":"Prometheus", "type":"prometheus", "access":"proxy", \
+"url":"http://$prometheus_ip:9090/", "basicAuth":false,"isDefault":true, \
+"user":"", "password":"" }
+EOF
+  curl -X POST -o /tmp/json -u admin:admin -H "Accept: application/json" \
+    -H "Content-type: application/json" \
+    -d @datasources.json http://admin:admin@$grafana_ip:3000/api/datasources
+
+  if [[ "$(jq -r '.message' /tmp/json)" != "Datasource added" ]]; then 
+    fail "Datasource creation failed"
+  fi
+  echo "${FUNCNAME[0]}: Prometheus datasource for Grafana added"
+
+  echo "${FUNCNAME[0]}: Import Grafana dashboards"
+  # Setup Prometheus dashboards
+  # https://grafana.com/dashboards?dataSource=prometheus
+  # To add additional dashboards, browse the URL above and import the dashboard via the id displayed for the dashboard
+  # Select the home icon (upper left), Dashboards / Import, enter the id, select load, and select the Prometheus datasource
+
+  cd ~/prometheus/dashboards
+  boards=$(ls)
+  for board in $boards; do
+    curl -X POST -u admin:admin -H "Accept: application/json" -H "Content-type: application/json" -d @${board} http://$grafana_ip:3000/api/dashboards/db
+  done
+  echo "${FUNCNAME[0]}: Grafana dashboards are available at http://$host_ip:3000 (login as admin/admin)"
+  echo "Grafana dashboards are available at http://$host_ip:3000 (login as admin/admin)" >>/tmp/summary
+  echo "${FUNCNAME[0]}: Grafana API is available at http://admin:admin@$host_ip:3000/api/v1/query?query=<string>"
+  echo "Grafana API is available at http://admin:admin@$host_ip:3000/api/v1/query?query=<string>" >>/tmp/summary
+}
+
+function run_and_connect_grafana() {
+  # Per http://docs.grafana.org/installation/docker/
+  host_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+  sudo docker run -d -p 3000:3000 --name grafana grafana/grafana
+  status=$(sudo docker inspect grafana | jq -r '.[0].State.Status')
+  while [[ "x$status" != "xrunning" ]]; do
+    echo "${FUNCNAME[0]}: Grafana container state is ($status)"
+    sleep 10
+    status=$(sudo docker inspect grafana | jq -r '.[0].State.Status')
+  done
+  echo "${FUNCNAME[0]}: Grafana container state is $status"
+
+  connect_grafana $host_ip $host_ip
+  echo "${FUNCNAME[0]}: connect_grafana complete"
+}
+
+nodes=$2
+case "$1" in
+  setup)
+    setup_prometheus "$2"
+    ;;
+  grafana)
+    run_and_connect_grafana
+    ;;
+  all)
+    setup_prometheus "$2"
+    run_and_connect_grafana
+    ;;
+  clean)
+    sudo kill $(ps -ef | grep "\./prometheus" | grep prometheus.yml | awk '{print $2}')
+    rm -rf ~/prometheus
+    sudo docker stop grafana
+    sudo docker rm grafana
+    for node in $nodes; do
+      ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+        ubuntu@$node "sudo kill $(ps -ef | grep ./node_exporter | awk '{print $2}')"
+      ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+        ubuntu@$node "rm -rf /home/ubuntu/node_exporter"
+      ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+        ubuntu@$node "sudo kill $(ps -ef | grep ./haproxy_exporter | awk '{print $2}')"
+      ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+        ubuntu@$node "rm -rf /home/ubuntu/haproxy_exporter"
+    done
+    ;;
+  *)
+    grep '#. ' $0
+esac
+cat /tmp/summary
diff --git a/tools/rancher/demo_deploy.sh b/tools/rancher/demo_deploy.sh
new file mode 100644 (file)
index 0000000..981b421
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Complete scripted deployment of an experimental Rancher-based
+#. cloud-native application platform. When complete, Rancher and the following
+#. will be installed:
+#. - nginx and dokuwiki as demo applications
+#. - prometheus + grafana for cluster monitoring/stats
+#.   Prometheus dashboard: http://<master_public_ip>:9090
+#.   Grafana dashboard: http://<master_public_ip>:3000
+#. 
+#. Prerequisites:
+#. - Ubuntu server for Rancher cluster nodes (admin/master and agent nodes)
+#. - MAAS server as cluster admin for Rancher master/agent nodes
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ bash ~/models/tools/rancher/demo_deploy.sh <key> "<hosts>" <master_ip>
+#.     "<agent ips>" [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <master_ip>: IP of cluster admin node
+#. <agent_ips>: space separated list of agent node IPs
+#. <extras>: optional name of script for extra setup functions as needed
+
+key=$1
+nodes="$2"
+admin_ip=$3
+agent_ips="$4"
+extras=$5
+
+source ~/models/tools/maas/deploy.sh $1 "$2" $5
+eval `ssh-agent`
+ssh-add $key
+if [[ "x$extras" != "x" ]]; then source $extras; fi
+scp -o StrictHostKeyChecking=no $key ubuntu@$admin_ip:/home/ubuntu/$key
+echo "Setting up Rancher..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+git clone https://gerrit.opnfv.org/gerrit/models
+bash models/tools/rancher/rancher-cluster.sh all "$agent_ips"
+EOF
+# TODO: Figure this out... Have to break the setup into two steps as something
+# causes the ssh session to end before the prometheus setup, if both scripts 
+# (k8s-cluster and prometheus-tools) are in the same ssh session
+echo "Setting up Prometheus..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+bash models/tools/prometheus/prometheus-tools.sh all "$agent_ips"
+EOF
+echo "All done!"
diff --git a/tools/rancher/rancher-cluster.sh b/tools/rancher/rancher-cluster.sh
new file mode 100644 (file)
index 0000000..42b3c58
--- /dev/null
@@ -0,0 +1,529 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#  
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#  
+# http://www.apache.org/licenses/LICENSE-2.0
+#  
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Functions for testing with rancher. 
+#. Prerequisites: 
+#. - Ubuntu server for master and agent nodes
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ cd ~/models/tools/rancher
+#.
+#. Usage:
+#. $ bash rancher_cluster.sh all "<agents>"
+#.   Automate setup and start demo blueprints.
+#.   <agents>: space-separated list of agent node IPs
+#. $ bash rancher_cluster.sh setup "<agents>"
+#.   Installs and starts master and agent nodes.
+#. $ bash rancher_cluster.sh master
+#.   Setup the Rancher master node.
+#. $ bash rancher_cluster.sh agents "<agents>"
+#.   Installs and starts agent nodes.
+#. $ bash rancher_cluster.sh demo
+#.   Start demo blueprints.
+#. $ bash rancher_cluster.sh clean "<agents>"
+#.   Removes Rancher and installed blueprints from the master and agent nodes.
+#.
+#. To call the procedures, directly, e.g. public_endpoint nginx/lb
+#. $ source rancher-cluster.sh 
+#. See below for function-specific usage
+#.
+
+# Install master
+function setup_master() {
+  docker_installed=$(dpkg-query -W --showformat='${Status}\n' docker-ce | grep -c "install ok")
+  if [[ $docker_installed == 0 ]]; then
+    echo "${FUNCNAME[0]}: installing and starting docker"
+    # Per https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/
+    sudo apt-get remove -y docker docker-engine docker.io
+    sudo apt-get update
+    sudo apt-get install -y \
+      linux-image-extra-$(uname -r) \
+      linux-image-extra-virtual
+    sudo apt-get install -y \
+      apt-transport-https \
+      ca-certificates \
+      curl \
+      software-properties-common
+    curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+    sudo add-apt-repository \
+     "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+     $(lsb_release -cs) \
+     stable"
+    sudo apt-get update
+    sudo apt-get install -y docker-ce
+
+    echo "${FUNCNAME[0]}: installing jq"
+    sudo apt-get install -y jq
+  fi
+
+  echo "${FUNCNAME[0]}: installing rancher server (master)"
+  sudo docker run -d --restart=unless-stopped -p 8080:8080 --name rancher rancher/server
+
+  echo "${FUNCNAME[0]}: wait until server is up at http://$1:8080"
+  delay=0
+  id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id')
+  while [[ "$id" == "" ]]; do
+    echo "${FUNCNAME[0]}: rancher server is not yet up, checking again in 10 seconds"
+    sleep 10
+    let delay=$delay+10
+    id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id')
+  done
+  echo "${FUNCNAME[0]}: rancher server is up after $delay seconds"
+
+  rm -rf ~/rancher 
+  mkdir ~/rancher 
+}     
+
+# Install rancher CLI tools
+# Usage example: install_cli_tools 172.16.0.2
+function install_cli_tools() {
+  echo "${FUNCNAME[0]}: installing rancher CLI tools for master $1"
+  cd ~
+  echo "${FUNCNAME[0]}: install Rancher CLI"
+  rm -rf rancher-v0.6.3
+  wget -q https://releases.rancher.com/cli/v0.6.3/rancher-linux-amd64-v0.6.3.tar.gz
+  gzip -d -f rancher-linux-amd64-v0.6.3.tar.gz
+  tar -xvf rancher-linux-amd64-v0.6.3.tar
+  sudo mv rancher-v0.6.3/rancher /usr/bin/rancher
+  echo "${FUNCNAME[0]}: install Rancher Compose"
+  rm -rf rancher-compose-v0.12.5
+  wget -q https://releases.rancher.com/compose/v0.12.5/rancher-compose-linux-amd64-v0.12.5.tar.gz
+  gzip -d -f rancher-compose-linux-amd64-v0.12.5.tar.gz
+  tar -xvf rancher-compose-linux-amd64-v0.12.5.tar
+  sudo mv rancher-compose-v0.12.5/rancher-compose /usr/bin/rancher-compose
+  echo "${FUNCNAME[0]}: setup Rancher CLI environment"
+  # CLI setup http://rancher.com/docs/rancher/v1.6/en/cli/
+  # Under the UI "API" select "Add account API key" and name it. Export the keys:
+  # The following scripted approach assumes you have 1 project/environment (Default)
+  # Set the url that Rancher is on
+  export RANCHER_URL=http://$1:8080/v1
+  id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id')
+  export RANCHER_ENVIRONMENT=$id
+  curl -s -o /tmp/keys -X POST -H 'Accept: application/json' -H 'Content-Type: application/json' -d '{"accountId":"reference[account]", "description":"string", "name":"string", "publicValue":"string", "secretValue":"password"}' http://$1:8080/v2-beta/projects/$id/apikeys
+#  curl -s -o /tmp/keys -X POST -H 'Accept: application/json' -H 'Content-Type: application/json' -d {"type":"apikey","accountId":"1a1","name":"admin","description":null,"created":null,"kind":null,"removed":null,"uuid":null} http://$1:8080/v2-beta/projects/$id/apikey
+  export RANCHER_ACCESS_KEY=$(jq -r '.publicValue' /tmp/keys)
+  export RANCHER_SECRET_KEY=$(jq -r '.secretValue' /tmp/keys)
+  # create the env file ~/.rancher/cli.json
+  rancher config <<EOF
+$RANCHER_URL
+$RANCHER_ACCESS_KEY
+$RANCHER_SECRET_KEY
+EOF
+  
+  master=$(rancher config --print | jq -r '.url' | cut -d '/' -f 3) 
+  echo "${FUNCNAME[0]}: Create registration token"
+  # added sleep to allow server time to be ready to create registration tokens (otherwise error is returned)
+  sleep 5
+  curl -s -o /tmp/token -X POST -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' -H 'Content-Type: application/json' -d '{"name":"master"}' http://$master/v1/registrationtokens 
+  while [[ $(jq -r ".type" /tmp/token) != "registrationToken" ]]; do
+    sleep 5
+    curl -s -o /tmp/token -X POST -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' -H 'Content-Type: application/json' -d '{"name":"master"}' http://$master/v1/registrationtokens 
+  done
+  id=$(jq -r ".id" /tmp/token)
+  echo "${FUNCNAME[0]}: registration token id=$id"
+
+  echo "${FUNCNAME[0]}: wait until registration command is created"
+  command=$(curl -s -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' http://$master/v1/registrationtokens/$id | jq -r '.command')
+  while [[ "$command" == "null" ]]; do
+    echo "${FUNCNAME[0]}: registration command is not yet created, checking again in 10 seconds"
+    sleep 10
+    command=$(curl -s -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' http://$master/v1/registrationtokens/$id | jq -r '.command')
+  done
+
+  export RANCHER_REGISTER_COMMAND="$command"
+
+#  echo "${FUNCNAME[0]}: activate rancher debug"
+#  export RANCHER_CLIENT_DEBUG=true
+
+  echo "${FUNCNAME[0]}: Install docker-compose for syntax checks"
+  sudo apt install -y docker-compose
+
+  cd ~/rancher
+}
+
+# Start an agent host
+# Usage example: start_host Default 172.16.0.7
+function setup_agent() {
+  echo "${FUNCNAME[0]}: SSH to host $2 in env $1 and execute registration command"
+
+  ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$2 "sudo apt-get install -y docker.io; sudo service docker start"
+  ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$2 $RANCHER_REGISTER_COMMAND
+
+  echo "${FUNCNAME[0]}: wait until agent $2 is active"
+  delay=0
+  id=$(rancher hosts | awk "/$2/{print \$1}")
+  while [[ "$id" == "" ]]; do
+    echo "${FUNCNAME[0]}: agent $2 is not yet created, checking again in 10 seconds"
+    sleep 10
+    let delay=$delay+10
+    id=$(rancher hosts | awk "/$2/{print \$1}")
+  done
+
+  echo "${FUNCNAME[0]}: agent $2 id=$id"
+  state=$(rancher inspect $id | jq -r '.state')
+  while [[ "$state" != "active" ]]; do
+    echo "${FUNCNAME[0]}: host $2 state is $state, checking again in 10 seconds"
+    sleep 10
+    let delay=$delay+10
+    state=$(rancher inspect $id | jq -r '.state')
+  done
+  echo "${FUNCNAME[0]}: agent $2 state is $state after $delay seconds"  
+}
+
+# Delete an agent host
+# Usage example: delete_host 172.16.0.7
+function stop_agent() {
+  echo "${FUNCNAME[0]}: deleting host $1"
+  rancher rm --stop $(rancher hosts | awk "/$1/{print \$1}") 
+}
+
+# Test service at access points
+# Usage example: check_service nginx/nginx http "Welcome to nginx!"
+function check_service() {
+  echo "${FUNCNAME[0]}: checking service state for $1 over $2 with match string $3"
+  service=$1
+  scheme=$2
+  match="$3"
+  id=$(rancher ps | grep " $service " | awk "{print \$1}")
+  n=0
+  while [[ "$(rancher inspect $id | jq -r ".publicEndpoints[$n].ipAddress")" != "null" ]]; do
+    ip=$(rancher inspect $id | jq -r ".publicEndpoints[$n].ipAddress")
+    port=$(rancher inspect $id | jq -r ".publicEndpoints[$n].port")
+    while [[ $(wget -qO- $scheme://$ip:$port | grep -c "$match") == 0 ]]; do
+      echo "$service service is NOT active at address $scheme://$ip:$port, waiting 10 seconds"
+      sleep 10
+    done
+    echo "$service service is active at address $scheme://$ip:$port"
+    let n=$n+1
+  done
+}
+
+# Wait n 10-second tries for service to be active
+# Usage example: wait_till_healthy nginx/nginx 6
+function wait_till_healthy() {
+  service=$1
+  tries=$2
+
+  let delay=$tries*10
+  echo "${FUNCNAME[0]}: waiting for service $service to be ready in $delay seconds"
+  id=$(rancher ps | grep " $service " | awk "{print \$1}")
+  health=$(rancher inspect $id | jq -r ".healthState")
+  state=$(rancher inspect $id | jq -r ".state")
+  while [[ $tries > 0 && "$health" != "healthy" ]]; do
+    health=$(rancher inspect $id | jq -r ".healthState")
+    echo $service is $health
+    sleep 10
+  done
+  echo $service state is $(rancher inspect $id | jq -r ".state")
+}
+
+# Start service based upon docker image and simple templates
+# Usage example: start_simple_service nginx nginx:latest 8081:80 3
+# Usage example: start_simple_service dokuwiki ununseptium/dokuwiki-docker 8082:80 2
+function start_simple_service() {
+  echo "${FUNCNAME[0]}: starting service $1 with image $2, ports $3, and scale $4"
+  service=$1
+  image=$2
+  # port is either a single (unexposed) port, or an source:target pair (source
+  # is the external port)
+  ports=$3
+  scale=$4
+  echo "${FUNCNAME[0]}: creating service folder ~/rancher/$service"
+  mkdir ~/rancher/$service
+  cd  ~/rancher/$service
+  echo "${FUNCNAME[0]}: creating docker-compose.yml"
+  # Define service via docker-compose.yml
+  cat <<EOF >docker-compose.yml
+version: '2'
+services:
+  $service:
+    image: $image
+    ports:
+      - "$ports"
+EOF
+
+  echo "${FUNCNAME[0]}: syntax checking docker-compose.yml"
+  docker-compose -f docker-compose.yml config
+
+  echo "${FUNCNAME[0]}: creating rancher-compose.yml"
+  cat <<EOF >rancher-compose.yml
+version: '2'
+services:
+  # Reference the service that you want to extend
+  $service:
+    scale: $scale
+EOF
+
+  echo "${FUNCNAME[0]}: starting service $service"
+  rancher up -s $service -d
+
+  wait_till_healthy "$service/$service" 6
+  cd  ~/rancher
+}
+
+# Add load balancer to a service
+# Usage example: lb_service nginx 8000 8081
+# Usage example: lb_service dokuwiki 8001 8082
+function lb_service() {
+  echo "${FUNCNAME[0]}: adding load balancer port $2 to service $1, port $3"
+  service=$1
+  lbport=$2
+  port=$3
+  cd  ~/rancher/$service
+  echo "${FUNCNAME[0]}: creating docker-compose-lb.yml"
+  # Define lb service via docker-compose.yml
+  cat <<EOF >docker-compose-lb.yml
+version: '2'
+services:
+  lb:
+    ports:
+    - $lbport
+    image: rancher/lb-service-haproxy:latest
+EOF
+
+  echo "${FUNCNAME[0]}: syntax checking docker-compose-lb.yml"
+  docker-compose -f docker-compose-lb.yml config
+
+  echo "${FUNCNAME[0]}: creating rancher-compose-lb.yml"
+  cat <<EOF >rancher-compose-lb.yml
+version: '2'
+services:
+  lb:
+    scale: 1
+    lb_config:
+      port_rules:
+      - source_port: $lbport
+        target_port: $port
+        service: $service/$service
+    health_check:
+      port: 42
+      interval: 2000
+      unhealthy_threshold: 3
+      healthy_threshold: 2
+      response_timeout: 2000
+EOF
+
+  echo "${FUNCNAME[0]}: starting service lb"
+  rancher up -s $service -d --file docker-compose-lb.yml --rancher-file rancher-compose-lb.yml
+
+  wait_till_healthy "$service/lb" 6
+  cd  ~/rancher
+}
+
+# Change scale of a service
+# Usage example: scale_service nginx 1
+function scale_service() {
+  echo "${FUNCNAME[0]}: scaling service $1 to $2 instances"
+  id=$(rancher ps | grep " $1 " | awk '{print $1}')
+  rancher scale $id=$2
+
+  scale=$(rancher inspect $id | jq -r '.currentScale')
+  health=$(rancher inspect $id | jq -r '.healthState')
+  while [[ $scale != $2 || "$health" != "healthy" ]]; do
+    echo $service is scaled at $scale and is $health
+    scale=$(rancher inspect $id | jq -r '.currentScale')
+    health=$(rancher inspect $id | jq -r '.healthState')
+    sleep 10
+  done
+  echo $service is scaled at $scale and is $health
+}
+
+# Get public endpoint for a service
+# Usage example public_endpoint nginx/lb
+function public_endpoint() {
+    id=$(rancher ps | grep " $1 " | awk "{print \$1}")
+    ip=$(rancher inspect $id | jq -r ".publicEndpoints[0].ipAddress")
+    port=$(rancher inspect $id | jq -r ".publicEndpoints[0].port")
+    echo "${FUNCNAME[0]}: $1 is accessible at http://$ip:$port"
+}
+
+# Stop a stack
+# Usage example: stop_stack nginx
+function stop_stack() {
+  echo "${FUNCNAME[0]}: stopping stack $1"
+  rancher stop $(rancher stacks | awk "/$1/{print \$1}")
+}
+
+# Start a stopped stack
+# Usage example: start_stack nginx
+function start_stack() {
+  echo "${FUNCNAME[0]}: starting stack $1"
+  rancher start $(rancher stacks | awk "/$1/{print \$1}")
+  wait_till_healthy $1 6
+}
+
+# Delete a stack
+# Usage example: delete_stack dokuwiki
+function delete_stack() {
+  id=$(rancher stacks | grep "$1" | awk "{print \$1}")
+  echo "${FUNCNAME[0]}: deleting stack $1 with id $id"
+  rancher rm --stop $id 
+}
+
+# Delete a service
+# Usage example: delete_service nginx/lb
+function delete_service() {
+  id=$(rancher ps | grep "$1" | awk "{print \$1}")
+  echo "${FUNCNAME[0]}: deleting service $1 with id $id"
+  rancher rm --stop $id 
+}
+
+# Start a complex service, i.e. with yaml file customizations
+# Usage example: start_complex_service grafana 3000:3000 1
+function start_complex_service() {
+  echo "${FUNCNAME[0]}: starting service $1 at ports $2, and scale $3"
+  service=$1
+  # port is either a single (unexposed) port, or an source:target pair (source
+  # is the external port)
+  ports=$2
+  scale=$3
+  echo "${FUNCNAME[0]}: creating service folder ~/rancher/$service"
+  mkdir ~/rancher/$service
+  cd  ~/rancher/$service
+  echo "${FUNCNAME[0]}: creating docker-compose.yml"
+  # Define service via docker-compose.yml
+  case "$service" in
+    grafana)
+      cat <<EOF >docker-compose.yml
+grafana:
+    image: grafana/grafana:latest
+    ports:
+        - $ports
+    environment:
+        GF_SECURITY_ADMIN_USER: "admin"
+        GF_SECURITY_ADMIN_PASSWORD: "password"
+        GF_SECURITY_SECRET_KEY: $(uuidgen)
+EOF
+    ;;
+         
+    *)
+  esac
+
+  echo "${FUNCNAME[0]}: starting service $service"
+  rancher up -s $service -d
+
+  wait_till_healthy "$service/$service" 6
+  cd  ~/rancher
+}
+
+# Automated demo
+# Usage example: rancher_demo start "172.16.0.7 172.16.0.8 172.16.0.9"
+# Usage example: rancher_demo clean "172.16.0.7 172.16.0.8 172.16.0.9"
+function demo() {
+  # Deploy apps
+  # Nginx web server, accessible on each machine port 8081, and via load
+  # balancer port 8001
+  start=`date +%s`
+  setup "$1"
+  start_simple_service nginx nginx:latest 8081:80 3
+  check_service nginx/nginx http "Welcome to nginx!"
+  lb_service nginx 8001 80
+  check_service nginx/lb http "Welcome to nginx!"
+  # Dokuwiki server, accessible on each machine port 8082, and via load
+  # balancer port 8002
+  start_simple_service dokuwiki ununseptium/dokuwiki-docker 8082:80 2
+  check_service dokuwiki/dokuwiki http "This topic does not exist yet"
+  lb_service dokuwiki 8002 80
+  check_service dokuwiki/lb http "This topic does not exist yet"
+  # Grafana server, accessible on one machine at port 3000
+  start_complex_service grafana 3000:3000 1
+  id=$(rancher ps | grep " grafana/grafana " | awk "{print \$1}")
+  source ~/models/tools/prometheus/prometheus-tools.sh setup "$agents"
+  grafana_ip=$(rancher inspect $id | jq -r ".publicEndpoints[0].ipAddress")
+  prometheus_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+  connect_grafana $prometheus_ip $grafana_ip
+  public_endpoint nginx/lb
+  public_endpoint dokuwiki/lb
+  public_endpoint grafana/grafana
+
+  end=`date +%s`
+  runtime=$((end-start))
+  runtime=$((runtime/60))
+  echo "${FUNCNAME[0]}: Demo duration = $runtime minutes"
+}
+
+# Automate the installation
+function setup() {
+  # Installation: http://rancher.com/docs/rancher/v1.6/en/
+  # Install rancher server (master) at primary interface of host
+  # Account control is disabled (open access to API), and Default env created
+  ip=$(ip route get 1 | awk '{print $NF;exit}')
+  setup_master $ip
+  # Install rancher CLI tools (rancher, rancher-compose), register with master
+  # and setup CLI environment (e.g. API access/secret keys)
+  install_cli_tools $ip
+
+  # Add agent hosts per http://rancher.com/docs/rancher/v1.6/en/hosts/custom/
+  agents="$1"
+  for agent in $agents; do
+    setup_agent Default $agent
+  done
+}
+
+# Clean the installation
+function clean() {
+  delete_service nginx/lb
+  delete_stack nginx
+  delete_service dokuwiki/lb
+  delete_stack dokuwiki
+  agents="$1"
+  for agent in $agents; do
+    stop_agent $agent
+  done
+  sudo docker stop rancher
+  sudo docker rm -v rancher
+  sudo apt-get remove -y docker-ce
+}
+
+export WORK_DIR=$(pwd)
+case "$1" in
+  master)
+    ip=$(ip route get 1 | awk '{print $NF;exit}')
+    setup_master $ip
+    ;;
+  agents)
+    agents="$2"
+    for agent in $agents; do
+      setup_agent Default $agent
+    done
+    ;;
+  ceph)
+    # TODO Ceph support for rancher, e.g. re
+    # http://rancher.com/docs/rancher/latest/en/rancher-services/storage-service/
+    # https://github.com/rancher/rancher/issues/8722
+    # setup_ceph "$2" $3 $4 $5
+    ;;
+  demo)
+    demo "$2"
+    ;;
+  setup)
+    setup "$2"
+    ;;
+  all)
+    setup "$2"
+    demo "$2"
+    check_service nginx/lb
+    check_service dokuwiki/lb
+    check_service grafana/grafana
+    ;;
+  clean)
+    clean "$2"
+    ;;
+  *)
+    if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi
+esac
diff --git a/tools/traffic.sh b/tools/traffic.sh
new file mode 100644 (file)
index 0000000..c020b6c
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# What this is: semi-random request generator for a web service 
+#.
+#. How to use:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models 
+#  $ bash models/tools/traffic <url>
+#   <url>: address of the web service
+
+echo "$0: $(date) Generate some traffic, somewhat randomly"
+ns="0 00 000"
+while true
+do
+  for n in $ns; do
+    sleep .$n$[ ( $RANDOM % 10 ) + 1 ]s
+    curl -s $1 > /dev/null
+  done
+done