Merge "Several updates to the alarm comparison table in Section 5.5.3"

author Carlos Goncalves <carlos.goncalves@neclab.eu>

Thu, 19 Jan 2017 09:06:13 +0000 (09:06 +0000)

committer Gerrit Code Review <gerrit@opnfv.org>

Thu, 19 Jan 2017 09:06:13 +0000 (09:06 +0000)
author Carlos Goncalves <carlos.goncalves@neclab.eu>
Thu, 19 Jan 2017 09:06:13 +0000 (09:06 +0000)
committer Gerrit Code Review <gerrit@opnfv.org>
Thu, 19 Jan 2017 09:06:13 +0000 (09:06 +0000)
diff --git a/INFO b/INFO

index 08c017f..bb91b30 100644 (file)
--- a/INFO
+++ b/INFO
@@ -21,7 +21,6 @@ Peter Lee (Corenova Technologies, peter@corenova.com)
  Ryota Mibu (NEC, r-mibu@cq.jp.nec.com)
  Serge Manning (Sprint, Serge.Manning@sprint.com)
  Tomi Juvonen (Nokia, tomi.juvonen@nokia.com)
-Tommy Lindgren (Ericsson, tommy.lindgren@ericsson.com)
  Uli Kleber (Huawei, ulrich.kleber@huawei.com)
  
  Link to TSC approval of the project: http://meetbot.opnfv.org/meetings/opnfv-meeting/2014/opnfv-meeting.2014-12-02-14.58.html
diff --git a/docs/requirements/03-architecture.rst b/docs/requirements/03-architecture.rst

index 9f620e6..2774df0 100644 (file)
--- a/docs/requirements/03-architecture.rst
+++ b/docs/requirements/03-architecture.rst
@@ -191,11 +191,15 @@ fencing, but there has not been any progress. The general description is
  available here:
  https://wiki.openstack.org/wiki/Fencing_Instances_of_an_Unreachable_Host
  
-As OpenStack does not cover fencing it is in the responsibility of the Doctor
-project to make sure fencing is done by using tools like pacemaker and by
-calling OpenStack APIs. Only after fencing is done OpenStack resources can be
-marked as down. In case there are gaps in OpenStack projects to have all
-relevant resources marked as down, those gaps need to be identified and fixed.
+OpenStack provides some mechanisms that allow fencing of faulty resources. Some
+are automatically invoked by the platform itself (e.g. Nova disables the
+compute service when libvirtd stops running, preventing new VMs to be scheduled
+to that node), while other mechanisms are consumer trigger-based actions (e.g.
+Neutron port admin-state-up). For other fencing actions not supported by
+OpenStack, the Doctor project may suggest ways to address the gap (e.g. through
+means of resourcing to external tools and orchestration methods), or
+documenting or implementing them upstream.
+
  The Doctor Inspector component will be responsible of marking resources down in
  the OpenStack and back up if necessary.
  
@@ -206,11 +210,11 @@ In the basic :ref:`uc-fault1` use case, no automatic actions will be taken by
  the VIM, but all recovery actions executed by the VIM and the NFVI will be
  instructed and coordinated by the Consumer.
  
-In a more advanced use case, the VIM shall be able to recover the failed virtual
+In a more advanced use case, the VIM may be able to recover the failed virtual
  resources according to a pre-defined behavior for that resource. In principle
  this means that the owner of the resource (i.e., its consumer or administrator)
  can define which recovery actions shall be taken by the VIM. Examples are a
-restart of the VM, migration/evacuation of the VM, or no action.
+restart of the VM or migration/evacuation of the VM.
  
  
  
diff --git a/tests/inspector.py b/tests/inspector.py

index 3a0e9ad..c1f9569 100644 (file)
--- a/tests/inspector.py
+++ b/tests/inspector.py
@@ -54,6 +54,7 @@ class DoctorInspectorSample(object):
      def disable_compute_host(self, hostname):
          for server in self.servers[hostname]:
              self.nova.servers.reset_state(server, 'error')
+            LOG.info('doctor mark vm(%s) error at %s' % (server, time.time()))
  
          # NOTE: We use our own client here instead of this novaclient for a
          #       workaround.  Once keystone provides v2.1 nova api endpoint
@@ -63,6 +64,7 @@ class DoctorInspectorSample(object):
          # self.nova.services.force_down(hostname, 'nova-compute', True)
          #
          nova_force_down.force_down(hostname)
+        LOG.info('doctor mark host(%s) down at %s' % (hostname, time.time()))
  
  
  app = Flask(__name__)
diff --git a/tests/profiler-poc.py b/tests/profiler-poc.py

index f20cad1..7103478 100644 (file)
--- a/tests/profiler-poc.py
+++ b/tests/profiler-poc.py
@@ -18,8 +18,10 @@ Valid check points are: DOCTOR_PROFILER_T{00-09}
  See also: https://goo.gl/98Osig
  """
  
+import json
  import os
  
+LOGFILE = 'performance-profile'
  PREFIX = 'DOCTOR_PROFILER'
  TOTAL_CHECK_POINTS = 10
  MODULE_CHECK_POINTS = ['T00', 'T01', 'T04', 'T05', 'T06', 'T09']
@@ -34,8 +36,8 @@ Total time cost: {total}(ms)
  host down:{T00}|      |      |      |          |        |      |      |      |
       raw failure:{T01}|      |      |          |        |      |      |      |
           found affected:{T02}|      |          |        |      |      |      |
-              marked host down:{T03}|          |        |      |      |      |
-                         set VM error:{T04}    |        |      |      |      |
+                  set VM error:{T03}|          |        |      |      |      |
+                         marked host down:{T04}|        |      |      |      |
                                 notified VM error:{T05}  |      |      |      |
                                          transformed event:{T06}|      |      |
                                                   evaluated event:{T07}|      |
@@ -76,6 +78,9 @@ def main():
  
      profile = TEMPLATE.format(**tags)
  
+    logfile = open('{}.json'.format(LOGFILE), 'w')
+    logfile.write(json.dumps(tags))
+
      print profile
  
  if __name__ == '__main__':
diff --git a/tests/run.sh b/tests/run.sh

index 1b21f09..206f6a4 100755 (executable)
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -27,6 +27,7 @@ DOCTOR_PW=doctor
  DOCTOR_PROJECT=doctor
  #TODO: change back to `_member_` when JIRA DOCTOR-55 is done
  DOCTOR_ROLE=admin
+PROFILER_TYPE=${PROFILER_TYPE:-none}
  
  TOP_DIR=$(cd $(dirname "$0") && pwd)
  
@@ -248,21 +249,36 @@ END_TXT
  }
  
  profile_performance_poc() {
-    total=`python -c "print(int(($notified-$detected)*1000))"`
-
+    triggered=$(grep "^doctor set host down at" disable_network.log |\
+                sed -e "s/^.* at //")
+    vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
+               sed -e "s/^.* at //")
+    hostdown=$(grep "doctor mark host.* down at" inspector.log |\
+               sed -e "s/^.* at //")
+
+    #calculate the relative interval to triggered(T00)
      export DOCTOR_PROFILER_T00=0
-    export DOCTOR_PROFILER_T09=$((total))
+    export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc)
+    export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc)
+    export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc)
+    export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc)
+
      python profiler-poc.py
  }
  
  calculate_notification_time() {
-    detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $10}')
-    notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $10}')
      if ! grep -q "doctor consumer notified at" consumer.log ; then
          die $LINENO "Consumer hasn't received fault notification."
      fi
  
-    if [[ PROFILER == 'poc' ]]; then
+    #keep 'at' as the last keyword just before the value, and
+    #use regex to get value instead of the fixed column
+    detected=$(grep "doctor monitor detected at" monitor.log |\
+               sed -e "s/^.* at //")
+    notified=$(grep "doctor consumer notified at" consumer.log |\
+               sed -e "s/^.* at //")
+
+    if [[ "$PROFILER_TYPE" == "poc" ]]; then
          profile_performance_poc
      fi
author	Carlos Goncalves <carlos.goncalves@neclab.eu>
	Thu, 19 Jan 2017 09:06:13 +0000 (09:06 +0000)
committer	Gerrit Code Review <gerrit@opnfv.org>
	Thu, 19 Jan 2017 09:06:13 +0000 (09:06 +0000)
INFO		patch \| blob \| history
docs/requirements/03-architecture.rst		patch \| blob \| history
tests/inspector.py		patch \| blob \| history
tests/profiler-poc.py		patch \| blob \| history
tests/run.sh		patch \| blob \| history