Fix bugs in profiler

author Yujun Zhang <zhang.yujunz@zte.com.cn>

Sat, 21 Jan 2017 14:30:05 +0000 (22:30 +0800)

committer Yujun Zhang <zhang.yujunz@zte.com.cn>

Tue, 31 Jan 2017 15:23:52 +0000 (23:23 +0800)
author Yujun Zhang <zhang.yujunz@zte.com.cn>
Sat, 21 Jan 2017 14:30:05 +0000 (22:30 +0800)
committer Yujun Zhang <zhang.yujunz@zte.com.cn>
Tue, 31 Jan 2017 15:23:52 +0000 (23:23 +0800)
diff --git a/tests/profiler-poc.py b/tests/profiler-poc.py

index 7103478..408cb09 100644 (file)
--- a/tests/profiler-poc.py
+++ b/tests/profiler-poc.py
@@ -21,7 +21,7 @@ See also: https://goo.gl/98Osig
  import json
  import os
  
-LOGFILE = 'performance-profile'
+OUTPUT = 'doctor_profiling_output'
  PREFIX = 'DOCTOR_PROFILER'
  TOTAL_CHECK_POINTS = 10
  MODULE_CHECK_POINTS = ['T00', 'T01', 'T04', 'T05', 'T06', 'T09']
@@ -33,7 +33,7 @@ Total time cost: {total}(ms)
         |Monitor|Inspector           |Controller|Notifier|Evaluator           |
         |{M00}  |{M01}               |{M02}     |{M03}   |{M04}               |
         |       |      |      |      |          |        |      |      |      |
-host down:{T00}|      |      |      |          |        |      |      |      |
+link down:{T00}|      |      |      |          |        |      |      |      |
       raw failure:{T01}|      |      |          |        |      |      |      |
           found affected:{T02}|      |          |        |      |      |      |
                    set VM error:{T03}|          |        |      |      |      |
@@ -78,7 +78,7 @@ def main():
  
      profile = TEMPLATE.format(**tags)
  
-    logfile = open('{}.json'.format(LOGFILE), 'w')
+    logfile = open('{}.json'.format(OUTPUT), 'w')
      logfile.write(json.dumps(tags))
  
      print profile
diff --git a/tests/run.sh b/tests/run.sh

index e7afb51..a26fafe 100755 (executable)
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -238,7 +238,7 @@ inject_failure() {
  dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $7}')
  sleep 1
  sudo ip link set $dev down
-echo "doctor set host down at" $(date "+%s.%N")
+echo "doctor set link down at" $(date "+%s.%N")
  sleep 180
  sudo ip link set $dev up
  sleep 1
@@ -247,24 +247,8 @@ END_TXT
      chmod +x disable_network.sh
      scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
      ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
-}
-
-profile_performance_poc() {
-    triggered=$(grep "^doctor set host down at" disable_network.log |\
-                sed -e "s/^.* at //")
-    vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
-               sed -e "s/^.* at //")
-    hostdown=$(grep "doctor mark host.* down at" inspector.log |\
-               sed -e "s/^.* at //")
-
-    #calculate the relative interval to triggered(T00)
-    export DOCTOR_PROFILER_T00=0
-    export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc)
-    export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc)
-    export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc)
-    export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc)
-
-    python profiler-poc.py
+    # use host time to get rid of potential time sync deviation between nodes
+    triggered=$(date "+%s.%N")
  }
  
  calculate_notification_time() {
@@ -279,10 +263,6 @@ calculate_notification_time() {
      notified=$(grep "doctor consumer notified at" consumer.log |\
                 sed -e "s/^.* at //")
  
-    if [[ "$PROFILER_TYPE" == "poc" ]]; then
-        profile_performance_poc
-    fi
-
      echo "$notified $detected" | \
          awk '{
              d = $1 - $2;
@@ -317,6 +297,43 @@ unset_forced_down_hosts() {
                  -f value -c State | grep -q down' 240 5
  }
  
+collect_logs() {
+    unset_forced_down_hosts
+    # TODO: We need to make sure the target compute host is back to IP
+    #       reachable. wait_ping() will be added by tojuvone .
+    sleep 110
+    scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
+
+    # TODO(yujunz) collect other logs, e.g. nova, aodh
+}
+
+run_profiler() {
+    if [[ "$PROFILER_TYPE" == "poc" ]]; then
+        linkdown=$(grep "doctor set link down at " disable_network.log |\
+                  sed -e "s/^.* at //")
+        vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
+                 sed -e "s/^.* at //")
+        hostdown=$(grep "doctor mark host.* down at" inspector.log |\
+                 sed -e "s/^.* at //")
+
+        # TODO(yujunz) check the actual delay to verify time sync status
+        # expected ~1s delay from $trigger to $linkdown
+        relative_start=${linkdown}
+        export DOCTOR_PROFILER_T00=$(python -c \
+          "print(int(($linkdown-$relative_start)*1000))")
+        export DOCTOR_PROFILER_T01=$(python -c \
+          "print(int(($detected-$relative_start)*1000))")
+        export DOCTOR_PROFILER_T03=$(python -c \
+          "print(int(($vmdown-$relative_start)*1000))")
+        export DOCTOR_PROFILER_T04=$(python -c \
+          "print(int(($hostdown-$relative_start)*1000))")
+        export DOCTOR_PROFILER_T09=$(python -c \
+          "print(int(($notified-$relative_start)*1000))")
+
+        python profiler-poc.py >doctor_profiler.log 2>&1
+    fi
+}
+
  cleanup() {
      set +e
      echo "cleanup..."
@@ -396,5 +413,7 @@ sleep 60
  
  check_host_status "(DOWN|UNKNOWN)"
  calculate_notification_time
+collect_logs
+run_profiler
  
  echo "done"
author	Yujun Zhang <zhang.yujunz@zte.com.cn>
	Sat, 21 Jan 2017 14:30:05 +0000 (22:30 +0800)
committer	Yujun Zhang <zhang.yujunz@zte.com.cn>
	Tue, 31 Jan 2017 15:23:52 +0000 (23:23 +0800)
tests/profiler-poc.py		patch \| blob \| history
tests/run.sh		patch \| blob \| history