Merge "Inspector design guideline"
authorGerald Kunzmann <kunzmann@docomolab-euro.com>
Tue, 13 Dec 2016 15:00:43 +0000 (15:00 +0000)
committerGerrit Code Review <gerrit@opnfv.org>
Tue, 13 Dec 2016 15:00:43 +0000 (15:00 +0000)
12 files changed:
UPSTREAM [new file with mode: 0644]
docs/design/performance-profiler.rst [new file with mode: 0644]
docs/requirements/02-use_cases.rst
docs/requirements/03-architecture.rst
docs/requirements/images/figure1.png [changed mode: 0755->0644]
docs/requirements/images/figure2.png [changed mode: 0755->0644]
tests/consumer.py
tests/functions-common [new file with mode: 0644]
tests/inspector.py
tests/logger.py [new file with mode: 0644]
tests/monitor.py
tests/run.sh

diff --git a/UPSTREAM b/UPSTREAM
new file mode 100644 (file)
index 0000000..d881040
--- /dev/null
+++ b/UPSTREAM
@@ -0,0 +1,39 @@
+# Upstream contributions, bitergia will crawl this and extract the relevant information
+# system is one of Gerrit, Bugzilla, Launchpad (insert more)
+---
+-
+  url: https://blueprints.launchpad.net/ceilometer/+spec/event-alarm-evaluator
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/mark-host-down
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/python-novaclient/+spec/support-force-down-service
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/get-valid-server-state
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/servers-by-host-status
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/maintenance-reason-to-server
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/service-status-notification
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/congress/+spec/push-type-datasource-driver
+  system: Launchpad
+#-
+#  url: https://review.openstack.org/#/c/314915/
+#  system: Gerrit
+-
+  url: https://blueprints.launchpad.net/cinder/+spec/mark-services-down
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/python-cinderclient/+spec/mark-service-down-cli
+  system: Launchpad
+#-
+#  url: https://bugs.launchpad.net/neutron/+bug/1513144
+#  system: Launchpad-bug
diff --git a/docs/design/performance-profiler.rst b/docs/design/performance-profiler.rst
new file mode 100644 (file)
index 0000000..f834a91
--- /dev/null
@@ -0,0 +1,118 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+====================
+Performance Profiler
+====================
+
+https://goo.gl/98Osig
+
+This blueprint proposes to create a performance profiler for doctor scenarios.
+
+Problem Description
+===================
+
+In the verification job for notification time, we have encountered some
+performance issues, such as
+
+1. In environment deployed by APEX, it meets the criteria while in the one by
+Fuel, the performance is much more poor.
+2. Signification performance degradation was spotted when we increase the total
+number of VMs
+
+It takes time to dig the log and analyse the reason. People have to collect
+timestamp at each checkpoints manually to find out the bottleneck. A performance
+profiler will make this process automatic.
+
+Proposed Change
+===============
+
+Current Doctor scenario covers the inspector and notifier in the whole fault
+management cycle::
+
+  start                                          end
+    +       +         +        +       +          +
+    |       |         |        |       |          |
+    |monitor|inspector|notifier|manager|controller|
+    +------>+         |        |       |          |
+  occurred  +-------->+        |       |          |
+    |     detected    +------->+       |          |
+    |       |     identified   +-------+          |
+    |       |               notified   +--------->+
+    |       |                  |    processed  resolved
+    |       |                  |                  |
+    |       +<-----doctor----->+                  |
+    |                                             |
+    |                                             |
+    +<---------------fault management------------>+
+
+The notification time can be split into several parts and visualized as a
+timeline::
+
+  start                                         end
+    0----5---10---15---20---25---30---35---40---45--> (x 10ms)
+    +    +   +   +   +    +      +   +   +   +   +
+  0-hostdown |   |   |    |      |   |   |   |   |
+    +--->+   |   |   |    |      |   |   |   |   |
+    |  1-raw failure |    |      |   |   |   |   |
+    |    +-->+   |   |    |      |   |   |   |   |
+    |    | 2-found affected      |   |   |   |   |
+    |    |   +-->+   |    |      |   |   |   |   |
+    |    |     3-marked host down|   |   |   |   |
+    |    |       +-->+    |      |   |   |   |   |
+    |    |         4-set VM error|   |   |   |   |
+    |    |           +--->+      |   |   |   |   |
+    |    |           |  5-notified VM error  |   |
+    |    |           |    +----->|   |   |   |   |
+    |    |           |    |    6-transformed event
+    |    |           |    |      +-->+   |   |   |
+    |    |           |    |      | 7-evaluated event
+    |    |           |    |      |   +-->+   |   |
+    |    |           |    |      |     8-fired alarm
+    |    |           |    |      |       +-->+   |
+    |    |           |    |      |         9-received alarm
+    |    |           |    |      |           +-->+
+  sample | sample    |    |      |           |10-handled alarm
+  monitor| inspector |nova| c/m  |    aodh   |
+    |                                        |
+    +<-----------------doctor--------------->+
+
+Note: c/m = ceilometer
+
+And a table of components sorted by time cost from most to least
+
++----------+---------+----------+
+|Component |Time Cost|Percentage|
++==========+=========+==========+
+|inspector |160ms    | 40%      |
++----------+---------+----------+
+|aodh      |110ms    | 30%      |
++----------+---------+----------+
+|monitor   |50ms     | 14%      |
++----------+---------+----------+
+|...       |         |          |
++----------+---------+----------+
+|...       |         |          |
++----------+---------+----------+
+
+Note: data in the table is for demonstration only, not actual measurement
+
+Timestamps can be collected from various sources
+
+1. log files
+2. trace point in code
+
+The performance profiler will be integrated into the verification job to provide
+detail result of the test. It can also be deployed independently to diagnose
+performance issue in specified environment.
+
+Working Items
+=============
+
+1. PoC with limited checkpoints
+2. Integration with verification job
+3. Collect timestamp at all checkpoints
+4. Display the profiling result in console
+5. Report the profiling result to test database
+6. Independent package which can be installed to specified environment
index 424a3c6..0a1f641 100644 (file)
@@ -136,7 +136,7 @@ the same as in the "Fault management using ACT-STBY configuration" use case,
 except in this case, the Consumer of a VM/VNF switches to STBY configuration
 based on a predicted fault, rather than an occurred fault.
 
-NVFI Maintenance
+NFVI Maintenance
 ----------------
 
 VM Retirement
index 8ff5dac..9f620e6 100644 (file)
@@ -217,7 +217,7 @@ restart of the VM, migration/evacuation of the VM, or no action.
 High level northbound interface specification
 ---------------------------------------------
 
-Fault management
+Fault Management
 ^^^^^^^^^^^^^^^^
 
 This interface allows the Consumer to subscribe to fault notification from the
@@ -321,7 +321,7 @@ An example of a high level message flow to cover the failed NFVI maintenance cas
 shown in :numref:`figure5c`.
 It consists of the following steps:
 
-5. The Consumer C3 switches to standby configuration (STDBY).
+5. The Consumer C3 switches to standby configuration (STBY).
 6. Instructions from Consumers C2/C3 are shared to VIM requesting certain actions to be performed (steps 6a, 6b).
    The VIM executes the requested actions and sends back a NACK to consumer C2 (step 6d) as the
    migration of the virtual resource(s) is not completed by the given timeout.
old mode 100755 (executable)
new mode 100644 (file)
index dacf0dd..267dddd
Binary files a/docs/requirements/images/figure1.png and b/docs/requirements/images/figure1.png differ
old mode 100755 (executable)
new mode 100644 (file)
index 3c8a2bf..9a3b166
Binary files a/docs/requirements/images/figure2.png and b/docs/requirements/images/figure2.png differ
index 9b3230f..3c012b4 100644 (file)
@@ -11,17 +11,20 @@ import argparse
 from flask import Flask
 from flask import request
 import json
+import logger as doctor_log
 import os
 import time
 
+LOG = doctor_log.Logger('doctor_consumer').getLogger()
+
 
 app = Flask(__name__)
 
 
 @app.route('/failure', methods=['POST'])
 def event_posted():
-    app.logger.debug('doctor consumer notified at %s' % time.time())
-    app.logger.debug('received data = %s' % request.data)
+    LOG.info('doctor consumer notified at %s' % time.time())
+    LOG.info('received data = %s' % request.data)
     d = json.loads(request.data)
     return "OK"
 
@@ -35,7 +38,7 @@ def get_args():
 
 def main():
     args = get_args()
-    app.run(host="0.0.0.0", port=args.port, debug=True)
+    app.run(host="0.0.0.0", port=args.port)
 
 
 if __name__ == '__main__':
diff --git a/tests/functions-common b/tests/functions-common
new file mode 100644 (file)
index 0000000..db2565a
--- /dev/null
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Test if the named environment variable is set and not zero length
+# is_set env-var
+function is_set {
+    local var=\$"$1"
+    eval "[ -n \"$var\" ]"
+}
+
+# Prints backtrace info
+# filename:lineno:function
+# backtrace level
+function backtrace {
+    local level=$1
+    local deep
+    deep=$((${#BASH_SOURCE[@]} - 1))
+    echo "[Call Trace]"
+    while [ $level -le $deep ]; do
+        echo "${BASH_SOURCE[$deep]}:${BASH_LINENO[$deep-1]}:${FUNCNAME[$deep-1]}"
+        deep=$((deep - 1))
+    done
+}
+
+# Prints line number and "message" in error format
+# err $LINENO "message"
+function err {
+    local exitcode=$?
+    local xtrace
+    xtrace=$(set +o | grep xtrace)
+    set +o xtrace
+    local msg="[ERROR] ${BASH_SOURCE[2]}:$1 $2"
+    echo $msg 1>&2;
+    if [[ -n ${LOGDIR} ]]; then
+        echo $msg >> "${LOGDIR}/error.log"
+    fi
+    $xtrace
+    return $exitcode
+}
+
+# Prints line number and "message" then exits
+# die $LINENO "message"
+function die {
+    local exitcode=$?
+    set +o xtrace
+    local line=$1; shift
+    if [ $exitcode == 0 ]; then
+        exitcode=1
+    fi
+    backtrace 2
+    err $line "$*"
+    # Give buffers a second to flush
+    sleep 1
+    exit $exitcode
+}
+
+# Checks an environment variable is not set or has length 0 OR if the
+# exit code is non-zero and prints "message" and exits
+# NOTE: env-var is the variable name without a '$'
+# die_if_not_set $LINENO env-var "message"
+function die_if_not_set {
+    local exitcode=$?
+    local xtrace
+    xtrace=$(set +o | grep xtrace)
+    set +o xtrace
+    local line=$1; shift
+    local evar=$1; shift
+    if ! is_set $evar || [ $exitcode != 0 ]; then
+        die $line "$*"
+    fi
+    $xtrace
+}
+
index 6261415..129a386 100644 (file)
@@ -12,6 +12,7 @@ import collections
 from flask import Flask
 from flask import request
 import json
+import logger as doctor_log
 import os
 import time
 
@@ -19,6 +20,8 @@ import novaclient.client as novaclient
 
 import nova_force_down
 
+LOG = doctor_log.Logger('doctor_inspector').getLogger()
+
 
 class DoctorInspectorSample(object):
 
@@ -44,9 +47,9 @@ class DoctorInspectorSample(object):
             try:
                 host=server.__dict__.get('OS-EXT-SRV-ATTR:host')
                 self.servers[host].append(server)
-                app.logger.debug('get hostname=%s from server=%s' % (host, server))
+                LOG.debug('get hostname=%s from server=%s' % (host, server))
             except Exception as e:
-                app.logger.debug('can not get hostname from server=%s' % server)
+                LOG.error('can not get hostname from server=%s' % server)
 
     def disable_compute_host(self, hostname):
         for server in self.servers[hostname]:
@@ -63,15 +66,14 @@ class DoctorInspectorSample(object):
 
 
 app = Flask(__name__)
-app.debug = True
 inspector = DoctorInspectorSample()
 
 
 @app.route('/events', methods=['POST'])
 def event_posted():
-    app.logger.debug('event posted at %s' % time.time())
-    app.logger.debug('inspector = %s' % inspector)
-    app.logger.debug('received data = %s' % request.data)
+    LOG.info('event posted at %s' % time.time())
+    LOG.info('inspector = %s' % inspector)
+    LOG.info('received data = %s' % request.data)
     d = json.loads(request.data)
     hostname = d['hostname']
     event_type = d['type']
@@ -91,5 +93,6 @@ def main():
     args = get_args()
     app.run(port=args.port)
 
+
 if __name__ == '__main__':
     main()
diff --git a/tests/logger.py b/tests/logger.py
new file mode 100644 (file)
index 0000000..a4f3323
--- /dev/null
@@ -0,0 +1,47 @@
+##############################################################################
+# Copyright (c) 2016 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+# Usage:
+#  import doctor_logger
+#  logger = doctor_logger.Logger("script_name").getLogger()
+#  logger.info("message to be shown with - INFO - ")
+#  logger.debug("message to be shown with - DEBUG -")
+
+import logging
+import os
+
+
+class Logger:
+    def __init__(self, logger_name):
+
+        CI_DEBUG = os.getenv('CI_DEBUG')
+
+        self.logger = logging.getLogger(logger_name)
+        self.logger.propagate = 0
+        self.logger.setLevel(logging.DEBUG)
+
+        formatter = logging.Formatter('%(asctime)s %(filename)s %(lineno)d '
+                                      '%(levelname)-6s %(message)s')
+
+        ch = logging.StreamHandler()
+        ch.setFormatter(formatter)
+        if CI_DEBUG is not None and CI_DEBUG.lower() == "true":
+            ch.setLevel(logging.DEBUG)
+        else:
+            ch.setLevel(logging.INFO)
+        self.logger.addHandler(ch)
+
+        file_handler = logging.FileHandler('%s.log' % logger_name)
+        file_handler.setFormatter(formatter)
+        file_handler.setLevel(logging.DEBUG)
+        self.logger.addHandler(file_handler)
+
+
+    def getLogger(self):
+        return self.logger
+
index caf4c32..26c911d 100644 (file)
@@ -10,6 +10,7 @@
 import argparse
 from datetime import datetime
 import json
+import logger as doctor_log
 import os
 import requests
 import socket
@@ -26,6 +27,8 @@ ICMP_ECHO_MESSAGE = '\x08\x00\xf7\xff\x00\x00\x00\x00'
 
 SUPPORTED_INSPECTOR_TYPES = ['sample', 'congress']
 
+LOG = doctor_log.Logger('doctor_monitor').getLogger()
+
 class DoctorMonitorSample(object):
 
     interval = 0.1  # second
@@ -58,8 +61,8 @@ class DoctorMonitorSample(object):
                                   (congress_endpoint, doctor_ds['id']))
 
     def start_loop(self):
-        print "start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
-                                                       'i': self.ip_addr}
+        LOG.debug("start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
+                                                       'i': self.ip_addr})
         sock = socket.socket(socket.AF_INET, socket.SOCK_RAW,
                              socket.IPPROTO_ICMP)
         sock.settimeout(self.timeout)
@@ -68,9 +71,9 @@ class DoctorMonitorSample(object):
                 sock.sendto(ICMP_ECHO_MESSAGE, (self.ip_addr, 0))
                 data = sock.recv(4096)
             except socket.timeout:
-                print "doctor monitor detected at %s" % time.time()
+                LOG.info("doctor monitor detected at %s" % time.time())
                 self.report_error()
-                print "ping timeout, quit monitoring..."
+                LOG.info("ping timeout, quit monitoring...")
                 return
             time.sleep(self.interval)
 
index 99e8fef..b6bfc0b 100755 (executable)
@@ -32,6 +32,7 @@ INSTALLER_IP=${INSTALLER_IP:-none}
 
 SUPPORTED_INSPECTOR_TYPES="sample congress"
 INSPECTOR_TYPE=${INSPECTOR_TYPE:-sample}
+TOP_DIR=$(cd $(dirname "$0") && pwd)
 
 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
 as_doctor_user="--os-username $DOCTOR_USER --os-password $DOCTOR_PW
@@ -61,10 +62,7 @@ get_installer_ip() {
     fi
 
     if [[ "$INSTALLER_TYPE" != "local" ]] ; then
-        if [[ -z "$INSTALLER_IP" ]] ; then
-            echo "ERROR: no installer ip"
-            exit 1
-        fi
+        die_if_not_set $LINENO INSTALLER_IP "No installer IP"
     fi
 }
 
@@ -190,10 +188,7 @@ get_compute_host_info() {
     COMPUTE_HOST=$(openstack $as_doctor_user server show $VM_NAME |
                    grep "OS-EXT-SRV-ATTR:host" | awk '{ print $4 }')
     compute_host_in_undercloud=${COMPUTE_HOST%%.*}
-    if [[ -z "$COMPUTE_HOST" ]] ; then
-        echo "ERROR: failed to get compute hostname"
-        exit 1
-    fi
+    die_if_not_set $LINENO COMPUTE_HOST "Failed to get compute hostname"
 
     if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
         COMPUTE_USER=${COMPUTE_USER:-heat-admin}
@@ -211,25 +206,20 @@ get_compute_host_info() {
         COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
     fi
 
-    if [[ -z "$COMPUTE_IP" ]]; then
-        echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
-        exit 1
-    fi
+    die_if_not_set $LINENO COMPUTE_IP "Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
     echo "COMPUTE_HOST=$COMPUTE_HOST"
     echo "COMPUTE_IP=$COMPUTE_IP"
 
     # verify connectivity to target compute host
     ping -c 1 "$COMPUTE_IP"
     if [[ $? -ne 0 ]] ; then
-        echo "ERROR: can not ping to computer host"
-        exit 1
+        die $LINENO "Can not ping to computer host"
     fi
 
     # verify ssh to target compute host
     ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'exit'
     if [[ $? -ne 0 ]] ; then
-        echo "ERROR: can not ssh to computer host"
-        exit 1
+        die $LINENO "Can not ssh to computer host"
     fi
 }
 
@@ -246,10 +236,7 @@ get_consumer_ip() {
     fi
     echo "CONSUMER_IP=$CONSUMER_IP"
 
-    if [[ -z "$CONSUMER_IP" ]]; then
-        echo "ERROR: Could not get CONSUMER_IP."
-        exit 1
-    fi
+    die_if_not_set $LINENO CONSUMER_IP "Could not get CONSUMER_IP."
 }
 
 download_image() {
@@ -312,12 +299,6 @@ create_alarm() {
         -q "traits.state=string::error; traits.instance_id=string::$vm_id"
 }
 
-print_log() {
-    log_file=$1
-    echo "$log_file:"
-    sed -e 's/^/    /' "$log_file"
-}
-
 start_monitor() {
     pgrep -f "python monitor.py" && return 0
     sudo -E python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" "$INSPECTOR_TYPE" \
@@ -327,7 +308,6 @@ start_monitor() {
 stop_monitor() {
     pgrep -f "python monitor.py" || return 0
     sudo kill $(pgrep -f "python monitor.py")
-    print_log monitor.log
 }
 
 congress_add_rule() {
@@ -376,10 +356,11 @@ start_inspector() {
         nova_api_min_version="2.11"
         nova_api_version=$(openstack congress datasource list | \
                            grep nova | grep -Po "(?<='api_version': ')[^']*")
-        [[ -z $nova_api_version ]] && nova_api_version="2.0"
+        if ! is_set nova_api_version; then
+            nova_api_version="2.0"
+        fi
         if [[ "$nova_api_version" < "$nova_api_min_version" ]]; then
-            echo "ERROR: Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
-            exit 1
+            die $LINENO "Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
         fi
         openstack congress driver list | grep -q " doctor "
         openstack congress datasource list | grep -q " doctor " || {
@@ -393,7 +374,6 @@ stop_inspector() {
     if [[ "$INSPECTOR_TYPE" == "sample" ]] ; then
         pgrep -f "python inspector.py" || return 0
         kill $(pgrep -f "python inspector.py")
-        print_log inspector.log
     elif [[ "$INSPECTOR_TYPE" == "congress" ]] ; then
         congress_del_rule host_force_down classification
         congress_del_rule error_vm_states classification
@@ -420,10 +400,7 @@ start_consumer() {
                             "fuel node | grep controller | cut -d '|' -f 5|xargs")
         fi
 
-        if [[ -z "$CONTROLLER_IPS" ]]; then
-            echo "ERROR: Could not get CONTROLLER_IPS."
-            exit 1
-        fi
+        die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS."
         for ip in $CONTROLLER_IPS
         do
             forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
@@ -436,7 +413,6 @@ start_consumer() {
 stop_consumer() {
     pgrep -f "python consumer.py" || return 0
     kill $(pgrep -f "python consumer.py")
-    print_log consumer.log
 
     # NOTE(r-mibu): terminate tunnels to the controller nodes
     if [[ "$INSTALLER_TYPE" != "local" ]] ; then
@@ -445,7 +421,6 @@ stop_consumer() {
             forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
             tunnel_command="sudo ssh $ssh_opts_cpu $COMPUTE_USER@$ip $forward_rule sleep 600"
             kill $(pgrep -f "$tunnel_command")
-            print_log "ssh_tunnel.${ip}.log"
         done
     fi
 }
@@ -463,12 +438,13 @@ wait_for_vm_launch() {
             sleep 5
             return 0
         fi
-        [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
+        if [[ "$state" == "ERROR" ]]; then
+            die $LINENO "vm state is ERROR"
+        fi
         count=$(($count+1))
         sleep 1
     done
-    echo "ERROR: time out while waiting for vm launch"
-    exit 1
+    die $LINENO "Time out while waiting for VM launch"
 }
 
 inject_failure() {
@@ -489,11 +465,10 @@ END_TXT
 }
 
 calculate_notification_time() {
-    detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
-    notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
+    detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $10}')
+    notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $10}')
     if ! grep -q "doctor consumer notified at" consumer.log ; then
-        echo "ERROR: consumer hasn't received fault notification."
-        exit 1
+        die $LINENO "Consumer hasn't received fault notification."
     fi
     echo "$notified $detected" | \
         awk '{
@@ -509,14 +484,11 @@ check_host_status() {
     host_status_line=$(openstack $as_doctor_user --os-compute-api-version 2.16 \
                        server show $VM_NAME | grep "host_status")
     host_status=$(echo $host_status_line | awk '{print $4}')
-    if [ -z "$host_status" ] ; then
-        echo "ERROR: host_status not reported by: nova show $VM_NAME"
-        exit 1
-    elif [[ "$expected_state" =~ "$host_status" ]] ; then
+    die_if_not_set $LINENO host_status "host_status not reported by: nova show $VM_NAME"
+    if [[ "$expected_state" =~ "$host_status" ]] ; then
         echo "$VM_NAME showing host_status: $host_status"
     else
-        echo "ERROR: host_status:$host_status not equal to expected_state: $expected_state"
-        exit 1
+        die $LINENO "host_status:$host_status not equal to expected_state: $expected_state"
     fi
 }
 
@@ -532,7 +504,6 @@ cleanup() {
     sleep 240
     check_host_status "UP"
     scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
-    print_log disable_network.log
 
     openstack $as_doctor_user server list | grep -q " $VM_NAME " && openstack $as_doctor_user server delete "$VM_NAME"
     sleep 1
@@ -560,6 +531,8 @@ echo "Note: doctor/tests/run.sh has been executed."
 
 trap cleanup EXIT
 
+source $TOP_DIR/functions-common
+
 echo "preparing test env..."
 get_installer_ip
 prepare_ssh_to_cloud