Merge "Support recovering VM on the same host"
authorCarlos Goncalves <carlos.goncalves@neclab.eu>
Mon, 18 Jan 2016 08:19:37 +0000 (08:19 +0000)
committerGerrit Code Review <gerrit@172.30.200.206>
Mon, 18 Jan 2016 08:19:37 +0000 (08:19 +0000)
.gitignore
docs/platformoverview/doctor.rst [new file with mode: 0644]
docs/platformoverview/images/figure-p1.png [new file with mode: 0755]
docs/platformoverview/index.rst [new file with mode: 0644]
tests/consumer.py [new file with mode: 0644]
tests/inspector.py [new file with mode: 0644]
tests/monitor.py [new file with mode: 0644]
tests/run.sh [new file with mode: 0755]

index 2884629..33a0451 100644 (file)
@@ -1,5 +1,5 @@
 *~
 .*.sw?
-/build/
-/output/
+/docs_build/
+/docs_output/
 /releng/
diff --git a/docs/platformoverview/doctor.rst b/docs/platformoverview/doctor.rst
new file mode 100644 (file)
index 0000000..6ee59a9
--- /dev/null
@@ -0,0 +1,47 @@
+===============
+Doctor Platform
+===============
+
+https://wiki.opnfv.org/doctor
+
+Features
+========
+
+Doctor platform, as of Brahmaputra release, provides the two features:
+
+* Immediate Notification
+* Consistent resource state awareness (Compute)
+
+These features enable high availability of Network Services on top of
+the virtualized infrastructure. Immediate notification allows VNF managers
+(VNFM) to process recovery actions promptly once a failure has occurred.
+Consistency of resource state is necessary to properly execute recovery
+actions properly in the VIM.
+
+Components
+==========
+
+Doctor platform, as of Brahmaputra release, consists of the following
+components:
+
+* OpenStack Compute (Nova)
+* OpenStack Telemetry (Ceilometer)
+* OpenStack Alarming (Aodh)
+* Doctor Inspector
+* Doctor Monitor
+
+.. note::
+    Doctor Inspector and Monitor are sample implementation for reference.
+
+You can see an overview of the Doctor platform and how components interact in
+:numref:`figure-p1`.
+
+.. figure:: images/figure-p1.png
+    :name: figure-p1
+    :width: 100%
+
+    Doctor platform and typical sequence (Brahmaputra)
+
+Detailed information on the Doctor architecture can be found in the Doctor
+requirements documentation:
+http://artifacts.opnfv.org/doctor/docs/requirements/05-implementation.html
diff --git a/docs/platformoverview/images/figure-p1.png b/docs/platformoverview/images/figure-p1.png
new file mode 100755 (executable)
index 0000000..e963d8b
Binary files /dev/null and b/docs/platformoverview/images/figure-p1.png differ
diff --git a/docs/platformoverview/index.rst b/docs/platformoverview/index.rst
new file mode 100644 (file)
index 0000000..cee06eb
--- /dev/null
@@ -0,0 +1,9 @@
+***************************
+Overview of Doctor Platform
+***************************
+
+.. toctree::
+   :numbered:
+   :maxdepth: 2
+
+   doctor.rst
diff --git a/tests/consumer.py b/tests/consumer.py
new file mode 100644 (file)
index 0000000..aa6e90e
--- /dev/null
@@ -0,0 +1,48 @@
+#
+# Copyright 2016 NEC Corporation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+from flask import Flask
+from flask import request
+import json
+import os
+import time
+
+
+app = Flask(__name__)
+
+
+@app.route('/failure', methods=['POST'])
+def event_posted():
+    app.logger.debug('doctor consumer notified at %s' % time.time())
+    app.logger.debug('received data = %s' % request.data)
+    d = json.loads(request.data)
+    return "OK"
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description='Doctor Sample Monitor')
+    parser.add_argument('port', metavar='PORT', type=int, nargs='?',
+                        help='a port for inspectpr')
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    app.run(port=args.port, debug=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/inspector.py b/tests/inspector.py
new file mode 100644 (file)
index 0000000..2445c77
--- /dev/null
@@ -0,0 +1,76 @@
+#
+# Copyright 2016 NEC Corporation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+from flask import Flask
+from flask import request
+import json
+import os
+
+import novaclient.client as novaclient
+
+
+class DoctorInspectorSample(object):
+
+    nova_api_version = 2.11
+
+    def __init__(self):
+        self.nova = novaclient.Client(self.nova_api_version,
+                                      os.environ['OS_USERNAME'],
+                                      os.environ['OS_PASSWORD'],
+                                      os.environ['OS_TENANT_NAME'],
+                                      os.environ['OS_AUTH_URL'],
+                                      connection_pool=True)
+        # check nova is available
+        self.nova.servers.list(detailed=False)
+
+    def disable_compute_host(self, hostname):
+        opts = {'all_tenants': True, 'host': hostname}
+        for server in self.nova.servers.list(detailed=False, search_opts=opts):
+            self.nova.servers.reset_state(server, 'error')
+        self.nova.services.force_down(hostname, 'nova-compute', True)
+
+
+app = Flask(__name__)
+inspector = DoctorInspectorSample()
+
+
+@app.route('/events', methods=['POST'])
+def event_posted():
+    app.logger.debug('event posted')
+    app.logger.debug('inspector = %s' % inspector)
+    app.logger.debug('received data = %s' % request.data)
+    d = json.loads(request.data)
+    hostname = d['hostname']
+    event_type = d['type']
+    if event_type == 'compute.host.down':
+        inspector.disable_compute_host(hostname)
+    return "OK"
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description='Doctor Sample Monitor')
+    parser.add_argument('port', metavar='PORT', type=int, nargs='?',
+                        help='a port for inspectpr')
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    app.run(port=args.port, debug=True)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/monitor.py b/tests/monitor.py
new file mode 100644 (file)
index 0000000..2a78cb6
--- /dev/null
@@ -0,0 +1,79 @@
+#
+# Copyright 2016 NEC Corporation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import json
+import requests
+import socket
+import time
+
+
+# NOTE: icmp message with all zero data (checksum = 0xf7ff)
+#       see https://tools.ietf.org/html/rfc792
+ICMP_ECHO_MESSAGE = '\x08\x00\xf7\xff\x00\x00\x00\x00'
+
+
+class DoctorMonitorSample(object):
+
+    interval = 0.1  # second
+    timeout = 0.1  # second
+    event_type = "compute.host.down"
+
+    def __init__(self, args):
+        self.hostname = args.hostname
+        self.inspector = args.inspector
+        self.ip_addr = socket.gethostbyname(self.hostname)
+
+    def start_loop(self):
+        print "start ping to host %s" % self.hostname
+        sock = socket.socket(socket.AF_INET, socket.SOCK_RAW,
+                             socket.IPPROTO_ICMP)
+        sock.settimeout(self.timeout)
+        while True:
+            try:
+                sock.sendto(ICMP_ECHO_MESSAGE, (self.ip_addr, 0))
+                data = sock.recv(4096)
+            except socket.timeout:
+                print "doctor monitor detected at %s" % time.time()
+                self.report_error()
+                print "ping timeout, quit monitoring..."
+                return
+            time.sleep(self.interval)
+
+    def report_error(self):
+        payload = {"type": self.event_type, "hostname": self.hostname}
+        data = json.dumps(payload)
+        headers = {'content-type': 'application/json'}
+        requests.post(self.inspector, data=data, headers=headers)
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description='Doctor Sample Monitor')
+    parser.add_argument('hostname', metavar='HOSTNAME', type=str, nargs='?',
+                        help='a hostname to monitor connectivity')
+    parser.add_argument('inspector', metavar='INSPECTOR', type=str, nargs='?',
+                        help='inspector url to report error',
+                        default='http://127.0.0.1:12345/events')
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    monitor = DoctorMonitorSample(args)
+    monitor.start_loop()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/run.sh b/tests/run.sh
new file mode 100755 (executable)
index 0000000..31fec61
--- /dev/null
@@ -0,0 +1,148 @@
+#!/bin/bash -ex
+#
+# Copyright 2016 NEC Corporation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+#branch=$(git rev-parse --abbrev-ref HEAD)
+BRANCH=master
+
+IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
+IMAGE_NAME=cirros
+IMAGE_FILE="${IMAGE_NAME}.img"
+IMAGE_FORMAT=qcow2
+VM_NAME=doctor_vm1
+VM_FLAVOR=m1.tiny
+COMPUTE_HOST='s142'
+ALARM_NAME=doctor_alarm1
+INSPECTOR_PORT=12345
+CONSUMER_PORT=12346
+
+
+download_image() {
+    [ -e "$IMAGE_FILE" ] && return 0
+    wget "$IMAGE_URL" -o "$IMAGE_FILE"
+}
+
+register_image() {
+    glance image-list | grep -q " $IMAGE_NAME " && return 0
+    glance image-create --name "$IMAGE_NAME" \
+                        --visibility public \
+                        --disk-format "$IMAGE_FORMAT" \
+                        --container-format bare \
+                        --file "$IMAGE_FILE"
+}
+
+boot_vm() {
+    nova list | grep -q " $VM_NAME " && return 0
+    nova boot --flavor "$VM_FLAVOR" \
+              --image "$IMAGE_NAME" \
+              "$VM_NAME"
+    sleep 1
+}
+
+create_alarm() {
+    ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
+    vm_id=$(nova list | grep " $VM_NAME " | awk '{print $2}')
+    ceilometer alarm-event-create --name "$ALARM_NAME" \
+        --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
+        --description "VM failure" \
+        --enabled True \
+        --repeat-actions False \
+        --severity "moderate" \
+        --event-type compute.instance.update \
+        -q "traits.state=string::error; traits.instance_id=string::$vm_id"
+}
+
+start_monitor() {
+    pgrep -f "python monitor.py" && return 0
+    sudo python monitor.py "$COMPUTE_HOST" "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
+    MONITOR_PID=$!
+}
+
+stop_monitor() {
+    pgrep -f "python monitor.py" || return 0
+    sudo kill $(pgrep -f "python monitor.py")
+    cat monitor.log
+}
+
+start_inspector() {
+    pgrep -f "python inspector.py" && return 0
+    python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
+}
+
+stop_inspector() {
+    pgrep -f "python inspector.py" || return 0
+    kill $(pgrep -f "python inspector.py")
+    cat inspector.log
+}
+
+start_consumer() {
+    pgrep -f "python consumer.py" && return 0
+    python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
+}
+
+stop_consumer() {
+    pgrep -f "python consumer.py" || return 0
+    kill $(pgrep -f "python consumer.py")
+    cat consumer.log
+}
+
+wait_for_vm_launch() {
+    echo "waiting for vm launch..."
+    while true
+    do
+        state=$(nova list | grep " $VM_NAME " | awk '{print $6}')
+        [[ "$state" == "ACTIVE" ]] && return 0
+        sleep 1
+    done
+}
+
+inject_failure() {
+    #FIXME
+    echo ssh $COMPUTE_HOST "ip link set eno1 down"
+}
+
+calculate_notification_time() {
+    detect=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
+    notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
+    duration=$(echo "$notified $detect" | awk '{print $1 - $2 }')
+    echo "$notified $detect" | \
+        awk '{d = $1 - $2; if (d < 1 ) print d " OK"; else print d " NG"}'
+}
+
+# TODO(r-mibu): Make sure env params are set properly for OpenStack clients
+# TODO(r-mibu): Make sure POD for doctor test is available in Pharos
+
+echo "Note: doctor/tests/run.sh has been executed, "
+echo "      but skipping this test due to lack of available test env/deployment."
+exit 0
+
+download_image
+register_image
+
+start_monitor
+start_inspector
+start_consumer
+
+boot_vm
+create_alarm
+wait_for_vm_launch
+
+sleep 60
+inject_failure
+sleep 10
+
+calculate_notification_time
+
+echo "done"