From: Gerald Kunzmann <kunzmann@docomolab-euro.com>
Date: Tue, 13 Dec 2016 15:00:43 +0000 (+0000)
Subject: Merge "Inspector design guideline"
X-Git-Tag: danube.1.RC1~37
X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=commitdiff_plain;h=6d0620d15a3391d4a0a9f2bec9de143ef68734d2;hp=36c3cacedfdae3e4b4f04e9bc021a20f7a6052f0;p=doctor.git

Merge "Inspector design guideline"
---

diff --git a/UPSTREAM b/UPSTREAM
new file mode 100644
index 00000000..d8810404
--- /dev/null
+++ b/UPSTREAM
@@ -0,0 +1,39 @@
+# Upstream contributions, bitergia will crawl this and extract the relevant information
+# system is one of Gerrit, Bugzilla, Launchpad (insert more)
+---
+-
+  url: https://blueprints.launchpad.net/ceilometer/+spec/event-alarm-evaluator
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/mark-host-down
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/python-novaclient/+spec/support-force-down-service
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/get-valid-server-state
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/servers-by-host-status
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/maintenance-reason-to-server
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/nova/+spec/service-status-notification
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/congress/+spec/push-type-datasource-driver
+  system: Launchpad
+#-
+#  url: https://review.openstack.org/#/c/314915/
+#  system: Gerrit
+-
+  url: https://blueprints.launchpad.net/cinder/+spec/mark-services-down
+  system: Launchpad
+-
+  url: https://blueprints.launchpad.net/python-cinderclient/+spec/mark-service-down-cli
+  system: Launchpad
+#-
+#  url: https://bugs.launchpad.net/neutron/+bug/1513144
+#  system: Launchpad-bug
diff --git a/docs/design/performance-profiler.rst b/docs/design/performance-profiler.rst
new file mode 100644
index 00000000..f834a915
--- /dev/null
+++ b/docs/design/performance-profiler.rst
@@ -0,0 +1,118 @@
+.. This work is licensed under a Creative Commons Attribution 4.0 International License.
+.. http://creativecommons.org/licenses/by/4.0
+
+
+====================
+Performance Profiler
+====================
+
+https://goo.gl/98Osig
+
+This blueprint proposes to create a performance profiler for doctor scenarios.
+
+Problem Description
+===================
+
+In the verification job for notification time, we have encountered some
+performance issues, such as
+
+1. In environment deployed by APEX, it meets the criteria while in the one by
+Fuel, the performance is much more poor.
+2. Signification performance degradation was spotted when we increase the total
+number of VMs
+
+It takes time to dig the log and analyse the reason. People have to collect
+timestamp at each checkpoints manually to find out the bottleneck. A performance
+profiler will make this process automatic.
+
+Proposed Change
+===============
+
+Current Doctor scenario covers the inspector and notifier in the whole fault
+management cycle::
+
+  start                                          end
+    +       +         +        +       +          +
+    |       |         |        |       |          |
+    |monitor|inspector|notifier|manager|controller|
+    +------>+         |        |       |          |
+  occurred  +-------->+        |       |          |
+    |     detected    +------->+       |          |
+    |       |     identified   +-------+          |
+    |       |               notified   +--------->+
+    |       |                  |    processed  resolved
+    |       |                  |                  |
+    |       +<-----doctor----->+                  |
+    |                                             |
+    |                                             |
+    +<---------------fault management------------>+
+
+The notification time can be split into several parts and visualized as a
+timeline::
+
+  start                                         end
+    0----5---10---15---20---25---30---35---40---45--> (x 10ms)
+    +    +   +   +   +    +      +   +   +   +   +
+  0-hostdown |   |   |    |      |   |   |   |   |
+    +--->+   |   |   |    |      |   |   |   |   |
+    |  1-raw failure |    |      |   |   |   |   |
+    |    +-->+   |   |    |      |   |   |   |   |
+    |    | 2-found affected      |   |   |   |   |
+    |    |   +-->+   |    |      |   |   |   |   |
+    |    |     3-marked host down|   |   |   |   |
+    |    |       +-->+    |      |   |   |   |   |
+    |    |         4-set VM error|   |   |   |   |
+    |    |           +--->+      |   |   |   |   |
+    |    |           |  5-notified VM error  |   |
+    |    |           |    +----->|   |   |   |   |
+    |    |           |    |    6-transformed event
+    |    |           |    |      +-->+   |   |   |
+    |    |           |    |      | 7-evaluated event
+    |    |           |    |      |   +-->+   |   |
+    |    |           |    |      |     8-fired alarm
+    |    |           |    |      |       +-->+   |
+    |    |           |    |      |         9-received alarm
+    |    |           |    |      |           +-->+
+  sample | sample    |    |      |           |10-handled alarm
+  monitor| inspector |nova| c/m  |    aodh   |
+    |                                        |
+    +<-----------------doctor--------------->+
+
+Note: c/m = ceilometer
+
+And a table of components sorted by time cost from most to least
+
++----------+---------+----------+
+|Component |Time Cost|Percentage|
++==========+=========+==========+
+|inspector |160ms    | 40%      |
++----------+---------+----------+
+|aodh      |110ms    | 30%      |
++----------+---------+----------+
+|monitor   |50ms     | 14%      |
++----------+---------+----------+
+|...       |         |          |
++----------+---------+----------+
+|...       |         |          |
++----------+---------+----------+
+
+Note: data in the table is for demonstration only, not actual measurement
+
+Timestamps can be collected from various sources
+
+1. log files
+2. trace point in code
+
+The performance profiler will be integrated into the verification job to provide
+detail result of the test. It can also be deployed independently to diagnose
+performance issue in specified environment.
+
+Working Items
+=============
+
+1. PoC with limited checkpoints
+2. Integration with verification job
+3. Collect timestamp at all checkpoints
+4. Display the profiling result in console
+5. Report the profiling result to test database
+6. Independent package which can be installed to specified environment
diff --git a/docs/requirements/02-use_cases.rst b/docs/requirements/02-use_cases.rst
index 424a3c6e..0a1f6413 100644
--- a/docs/requirements/02-use_cases.rst
+++ b/docs/requirements/02-use_cases.rst
@@ -136,7 +136,7 @@ the same as in the "Fault management using ACT-STBY configuration" use case,
 except in this case, the Consumer of a VM/VNF switches to STBY configuration
 based on a predicted fault, rather than an occurred fault.
 
-NVFI Maintenance
+NFVI Maintenance
 ----------------
 
 VM Retirement
diff --git a/docs/requirements/03-architecture.rst b/docs/requirements/03-architecture.rst
index 8ff5dacf..9f620e68 100644
--- a/docs/requirements/03-architecture.rst
+++ b/docs/requirements/03-architecture.rst
@@ -217,7 +217,7 @@ restart of the VM, migration/evacuation of the VM, or no action.
 High level northbound interface specification
 ---------------------------------------------
 
-Fault management
+Fault Management
 ^^^^^^^^^^^^^^^^
 
 This interface allows the Consumer to subscribe to fault notification from the
@@ -321,7 +321,7 @@ An example of a high level message flow to cover the failed NFVI maintenance cas
 shown in :numref:`figure5c`.
 It consists of the following steps:
 
-5. The Consumer C3 switches to standby configuration (STDBY).
+5. The Consumer C3 switches to standby configuration (STBY).
 6. Instructions from Consumers C2/C3 are shared to VIM requesting certain actions to be performed (steps 6a, 6b).
    The VIM executes the requested actions and sends back a NACK to consumer C2 (step 6d) as the
    migration of the virtual resource(s) is not completed by the given timeout.
diff --git a/docs/requirements/images/figure1.png b/docs/requirements/images/figure1.png
old mode 100755
new mode 100644
index dacf0dd4..267ddddc
Binary files a/docs/requirements/images/figure1.png and b/docs/requirements/images/figure1.png differ
diff --git a/docs/requirements/images/figure2.png b/docs/requirements/images/figure2.png
old mode 100755
new mode 100644
index 3c8a2bf1..9a3b166d
Binary files a/docs/requirements/images/figure2.png and b/docs/requirements/images/figure2.png differ
diff --git a/tests/consumer.py b/tests/consumer.py
index 9b3230fe..3c012b4f 100644
--- a/tests/consumer.py
+++ b/tests/consumer.py
@@ -11,17 +11,20 @@ import argparse
 from flask import Flask
 from flask import request
 import json
+import logger as doctor_log
 import os
 import time
 
+LOG = doctor_log.Logger('doctor_consumer').getLogger()
+
 
 app = Flask(__name__)
 
 
 @app.route('/failure', methods=['POST'])
 def event_posted():
-    app.logger.debug('doctor consumer notified at %s' % time.time())
-    app.logger.debug('received data = %s' % request.data)
+    LOG.info('doctor consumer notified at %s' % time.time())
+    LOG.info('received data = %s' % request.data)
     d = json.loads(request.data)
     return "OK"
 
@@ -35,7 +38,7 @@ def get_args():
 
 def main():
     args = get_args()
-    app.run(host="0.0.0.0", port=args.port, debug=True)
+    app.run(host="0.0.0.0", port=args.port)
 
 
 if __name__ == '__main__':
diff --git a/tests/functions-common b/tests/functions-common
new file mode 100644
index 00000000..db2565a3
--- /dev/null
+++ b/tests/functions-common
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Test if the named environment variable is set and not zero length
+# is_set env-var
+function is_set {
+    local var=\$"$1"
+    eval "[ -n \"$var\" ]"
+}
+
+# Prints backtrace info
+# filename:lineno:function
+# backtrace level
+function backtrace {
+    local level=$1
+    local deep
+    deep=$((${#BASH_SOURCE[@]} - 1))
+    echo "[Call Trace]"
+    while [ $level -le $deep ]; do
+        echo "${BASH_SOURCE[$deep]}:${BASH_LINENO[$deep-1]}:${FUNCNAME[$deep-1]}"
+        deep=$((deep - 1))
+    done
+}
+
+# Prints line number and "message" in error format
+# err $LINENO "message"
+function err {
+    local exitcode=$?
+    local xtrace
+    xtrace=$(set +o | grep xtrace)
+    set +o xtrace
+    local msg="[ERROR] ${BASH_SOURCE[2]}:$1 $2"
+    echo $msg 1>&2;
+    if [[ -n ${LOGDIR} ]]; then
+        echo $msg >> "${LOGDIR}/error.log"
+    fi
+    $xtrace
+    return $exitcode
+}
+
+# Prints line number and "message" then exits
+# die $LINENO "message"
+function die {
+    local exitcode=$?
+    set +o xtrace
+    local line=$1; shift
+    if [ $exitcode == 0 ]; then
+        exitcode=1
+    fi
+    backtrace 2
+    err $line "$*"
+    # Give buffers a second to flush
+    sleep 1
+    exit $exitcode
+}
+
+# Checks an environment variable is not set or has length 0 OR if the
+# exit code is non-zero and prints "message" and exits
+# NOTE: env-var is the variable name without a '$'
+# die_if_not_set $LINENO env-var "message"
+function die_if_not_set {
+    local exitcode=$?
+    local xtrace
+    xtrace=$(set +o | grep xtrace)
+    set +o xtrace
+    local line=$1; shift
+    local evar=$1; shift
+    if ! is_set $evar || [ $exitcode != 0 ]; then
+        die $line "$*"
+    fi
+    $xtrace
+}
+
diff --git a/tests/inspector.py b/tests/inspector.py
index 62614158..129a386a 100644
--- a/tests/inspector.py
+++ b/tests/inspector.py
@@ -12,6 +12,7 @@ import collections
 from flask import Flask
 from flask import request
 import json
+import logger as doctor_log
 import os
 import time
 
@@ -19,6 +20,8 @@ import novaclient.client as novaclient
 
 import nova_force_down
 
+LOG = doctor_log.Logger('doctor_inspector').getLogger()
+
 
 class DoctorInspectorSample(object):
 
@@ -44,9 +47,9 @@ class DoctorInspectorSample(object):
             try:
                 host=server.__dict__.get('OS-EXT-SRV-ATTR:host')
                 self.servers[host].append(server)
-                app.logger.debug('get hostname=%s from server=%s' % (host, server))
+                LOG.debug('get hostname=%s from server=%s' % (host, server))
             except Exception as e:
-                app.logger.debug('can not get hostname from server=%s' % server)
+                LOG.error('can not get hostname from server=%s' % server)
 
     def disable_compute_host(self, hostname):
         for server in self.servers[hostname]:
@@ -63,15 +66,14 @@ class DoctorInspectorSample(object):
 
 
 app = Flask(__name__)
-app.debug = True
 inspector = DoctorInspectorSample()
 
 
 @app.route('/events', methods=['POST'])
 def event_posted():
-    app.logger.debug('event posted at %s' % time.time())
-    app.logger.debug('inspector = %s' % inspector)
-    app.logger.debug('received data = %s' % request.data)
+    LOG.info('event posted at %s' % time.time())
+    LOG.info('inspector = %s' % inspector)
+    LOG.info('received data = %s' % request.data)
     d = json.loads(request.data)
     hostname = d['hostname']
     event_type = d['type']
@@ -91,5 +93,6 @@ def main():
     args = get_args()
     app.run(port=args.port)
 
+
 if __name__ == '__main__':
     main()
diff --git a/tests/logger.py b/tests/logger.py
new file mode 100644
index 00000000..a4f33234
--- /dev/null
+++ b/tests/logger.py
@@ -0,0 +1,47 @@
+##############################################################################
+# Copyright (c) 2016 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+# Usage:
+#  import doctor_logger
+#  logger = doctor_logger.Logger("script_name").getLogger()
+#  logger.info("message to be shown with - INFO - ")
+#  logger.debug("message to be shown with - DEBUG -")
+
+import logging
+import os
+
+
+class Logger:
+    def __init__(self, logger_name):
+
+        CI_DEBUG = os.getenv('CI_DEBUG')
+
+        self.logger = logging.getLogger(logger_name)
+        self.logger.propagate = 0
+        self.logger.setLevel(logging.DEBUG)
+
+        formatter = logging.Formatter('%(asctime)s %(filename)s %(lineno)d '
+                                      '%(levelname)-6s %(message)s')
+
+        ch = logging.StreamHandler()
+        ch.setFormatter(formatter)
+        if CI_DEBUG is not None and CI_DEBUG.lower() == "true":
+            ch.setLevel(logging.DEBUG)
+        else:
+            ch.setLevel(logging.INFO)
+        self.logger.addHandler(ch)
+
+        file_handler = logging.FileHandler('%s.log' % logger_name)
+        file_handler.setFormatter(formatter)
+        file_handler.setLevel(logging.DEBUG)
+        self.logger.addHandler(file_handler)
+
+
+    def getLogger(self):
+        return self.logger
+
diff --git a/tests/monitor.py b/tests/monitor.py
index caf4c321..26c911da 100644
--- a/tests/monitor.py
+++ b/tests/monitor.py
@@ -10,6 +10,7 @@
 import argparse
 from datetime import datetime
 import json
+import logger as doctor_log
 import os
 import requests
 import socket
@@ -26,6 +27,8 @@ ICMP_ECHO_MESSAGE = '\x08\x00\xf7\xff\x00\x00\x00\x00'
 
 SUPPORTED_INSPECTOR_TYPES = ['sample', 'congress']
 
+LOG = doctor_log.Logger('doctor_monitor').getLogger()
+
 class DoctorMonitorSample(object):
 
     interval = 0.1  # second
@@ -58,8 +61,8 @@ class DoctorMonitorSample(object):
                                   (congress_endpoint, doctor_ds['id']))
 
     def start_loop(self):
-        print "start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
-                                                       'i': self.ip_addr}
+        LOG.debug("start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
+                                                       'i': self.ip_addr})
         sock = socket.socket(socket.AF_INET, socket.SOCK_RAW,
                              socket.IPPROTO_ICMP)
         sock.settimeout(self.timeout)
@@ -68,9 +71,9 @@ class DoctorMonitorSample(object):
                 sock.sendto(ICMP_ECHO_MESSAGE, (self.ip_addr, 0))
                 data = sock.recv(4096)
             except socket.timeout:
-                print "doctor monitor detected at %s" % time.time()
+                LOG.info("doctor monitor detected at %s" % time.time())
                 self.report_error()
-                print "ping timeout, quit monitoring..."
+                LOG.info("ping timeout, quit monitoring...")
                 return
             time.sleep(self.interval)
 
diff --git a/tests/run.sh b/tests/run.sh
index 99e8feff..b6bfc0ba 100755
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -32,6 +32,7 @@ INSTALLER_IP=${INSTALLER_IP:-none}
 
 SUPPORTED_INSPECTOR_TYPES="sample congress"
 INSPECTOR_TYPE=${INSPECTOR_TYPE:-sample}
+TOP_DIR=$(cd $(dirname "$0") && pwd)
 
 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
 as_doctor_user="--os-username $DOCTOR_USER --os-password $DOCTOR_PW
@@ -61,10 +62,7 @@ get_installer_ip() {
     fi
 
     if [[ "$INSTALLER_TYPE" != "local" ]] ; then
-        if [[ -z "$INSTALLER_IP" ]] ; then
-            echo "ERROR: no installer ip"
-            exit 1
-        fi
+        die_if_not_set $LINENO INSTALLER_IP "No installer IP"
     fi
 }
 
@@ -190,10 +188,7 @@ get_compute_host_info() {
     COMPUTE_HOST=$(openstack $as_doctor_user server show $VM_NAME |
                    grep "OS-EXT-SRV-ATTR:host" | awk '{ print $4 }')
     compute_host_in_undercloud=${COMPUTE_HOST%%.*}
-    if [[ -z "$COMPUTE_HOST" ]] ; then
-        echo "ERROR: failed to get compute hostname"
-        exit 1
-    fi
+    die_if_not_set $LINENO COMPUTE_HOST "Failed to get compute hostname"
 
     if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
         COMPUTE_USER=${COMPUTE_USER:-heat-admin}
@@ -211,25 +206,20 @@ get_compute_host_info() {
         COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
     fi
 
-    if [[ -z "$COMPUTE_IP" ]]; then
-        echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
-        exit 1
-    fi
+    die_if_not_set $LINENO COMPUTE_IP "Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
     echo "COMPUTE_HOST=$COMPUTE_HOST"
     echo "COMPUTE_IP=$COMPUTE_IP"
 
     # verify connectivity to target compute host
     ping -c 1 "$COMPUTE_IP"
     if [[ $? -ne 0 ]] ; then
-        echo "ERROR: can not ping to computer host"
-        exit 1
+        die $LINENO "Can not ping to computer host"
     fi
 
     # verify ssh to target compute host
     ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'exit'
     if [[ $? -ne 0 ]] ; then
-        echo "ERROR: can not ssh to computer host"
-        exit 1
+        die $LINENO "Can not ssh to computer host"
     fi
 }
 
@@ -246,10 +236,7 @@ get_consumer_ip() {
     fi
     echo "CONSUMER_IP=$CONSUMER_IP"
 
-    if [[ -z "$CONSUMER_IP" ]]; then
-        echo "ERROR: Could not get CONSUMER_IP."
-        exit 1
-    fi
+    die_if_not_set $LINENO CONSUMER_IP "Could not get CONSUMER_IP."
 }
 
 download_image() {
@@ -312,12 +299,6 @@ create_alarm() {
         -q "traits.state=string::error; traits.instance_id=string::$vm_id"
 }
 
-print_log() {
-    log_file=$1
-    echo "$log_file:"
-    sed -e 's/^/    /' "$log_file"
-}
-
 start_monitor() {
     pgrep -f "python monitor.py" && return 0
     sudo -E python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" "$INSPECTOR_TYPE" \
@@ -327,7 +308,6 @@ start_monitor() {
 stop_monitor() {
     pgrep -f "python monitor.py" || return 0
     sudo kill $(pgrep -f "python monitor.py")
-    print_log monitor.log
 }
 
 congress_add_rule() {
@@ -376,10 +356,11 @@ start_inspector() {
         nova_api_min_version="2.11"
         nova_api_version=$(openstack congress datasource list | \
                            grep nova | grep -Po "(?<='api_version': ')[^']*")
-        [[ -z $nova_api_version ]] && nova_api_version="2.0"
+        if ! is_set nova_api_version; then
+            nova_api_version="2.0"
+        fi
         if [[ "$nova_api_version" < "$nova_api_min_version" ]]; then
-            echo "ERROR: Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
-            exit 1
+            die $LINENO "Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
         fi
         openstack congress driver list | grep -q " doctor "
         openstack congress datasource list | grep -q " doctor " || {
@@ -393,7 +374,6 @@ stop_inspector() {
     if [[ "$INSPECTOR_TYPE" == "sample" ]] ; then
         pgrep -f "python inspector.py" || return 0
         kill $(pgrep -f "python inspector.py")
-        print_log inspector.log
     elif [[ "$INSPECTOR_TYPE" == "congress" ]] ; then
         congress_del_rule host_force_down classification
         congress_del_rule error_vm_states classification
@@ -420,10 +400,7 @@ start_consumer() {
                             "fuel node | grep controller | cut -d '|' -f 5|xargs")
         fi
 
-        if [[ -z "$CONTROLLER_IPS" ]]; then
-            echo "ERROR: Could not get CONTROLLER_IPS."
-            exit 1
-        fi
+        die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS."
         for ip in $CONTROLLER_IPS
         do
             forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
@@ -436,7 +413,6 @@ start_consumer() {
 stop_consumer() {
     pgrep -f "python consumer.py" || return 0
     kill $(pgrep -f "python consumer.py")
-    print_log consumer.log
 
     # NOTE(r-mibu): terminate tunnels to the controller nodes
     if [[ "$INSTALLER_TYPE" != "local" ]] ; then
@@ -445,7 +421,6 @@ stop_consumer() {
             forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
             tunnel_command="sudo ssh $ssh_opts_cpu $COMPUTE_USER@$ip $forward_rule sleep 600"
             kill $(pgrep -f "$tunnel_command")
-            print_log "ssh_tunnel.${ip}.log"
         done
     fi
 }
@@ -463,12 +438,13 @@ wait_for_vm_launch() {
             sleep 5
             return 0
         fi
-        [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
+        if [[ "$state" == "ERROR" ]]; then
+            die $LINENO "vm state is ERROR"
+        fi
         count=$(($count+1))
         sleep 1
     done
-    echo "ERROR: time out while waiting for vm launch"
-    exit 1
+    die $LINENO "Time out while waiting for VM launch"
 }
 
 inject_failure() {
@@ -489,11 +465,10 @@ END_TXT
 }
 
 calculate_notification_time() {
-    detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
-    notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
+    detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $10}')
+    notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $10}')
     if ! grep -q "doctor consumer notified at" consumer.log ; then
-        echo "ERROR: consumer hasn't received fault notification."
-        exit 1
+        die $LINENO "Consumer hasn't received fault notification."
     fi
     echo "$notified $detected" | \
         awk '{
@@ -509,14 +484,11 @@ check_host_status() {
     host_status_line=$(openstack $as_doctor_user --os-compute-api-version 2.16 \
                        server show $VM_NAME | grep "host_status")
     host_status=$(echo $host_status_line | awk '{print $4}')
-    if [ -z "$host_status" ] ; then
-        echo "ERROR: host_status not reported by: nova show $VM_NAME"
-        exit 1
-    elif [[ "$expected_state" =~ "$host_status" ]] ; then
+    die_if_not_set $LINENO host_status "host_status not reported by: nova show $VM_NAME"
+    if [[ "$expected_state" =~ "$host_status" ]] ; then
         echo "$VM_NAME showing host_status: $host_status"
     else
-        echo "ERROR: host_status:$host_status not equal to expected_state: $expected_state"
-        exit 1
+        die $LINENO "host_status:$host_status not equal to expected_state: $expected_state"
     fi
 }
 
@@ -532,7 +504,6 @@ cleanup() {
     sleep 240
     check_host_status "UP"
     scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
-    print_log disable_network.log
 
     openstack $as_doctor_user server list | grep -q " $VM_NAME " && openstack $as_doctor_user server delete "$VM_NAME"
     sleep 1
@@ -560,6 +531,8 @@ echo "Note: doctor/tests/run.sh has been executed."
 
 trap cleanup EXIT
 
+source $TOP_DIR/functions-common
+
 echo "preparing test env..."
 get_installer_ip
 prepare_ssh_to_cloud