Merge "Fix bugs in profiler"
authorTomi Juvonen <tomi.juvonen@nokia.com>
Thu, 2 Feb 2017 15:20:38 +0000 (15:20 +0000)
committerGerrit Code Review <gerrit@opnfv.org>
Thu, 2 Feb 2017 15:20:38 +0000 (15:20 +0000)
1  2 
tests/run.sh

diff --combined tests/run.sh
@@@ -179,7 -179,8 +179,7 @@@ start_consumer() 
                               nova list | grep ' overcloud-controller-[0-9] ' \
                               | sed -e 's/^.*ctlplane=//' -e 's/ *|\$//'")
          elif is_installer fuel; then
 -            CONTROLLER_IPS=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \
 -                            "fuel node | grep controller | cut -d '|' -f 5|xargs")
 +            get_controller_ips
          fi
  
          die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS."
@@@ -235,10 -236,9 +235,10 @@@ inject_failure() 
      cat > disable_network.sh << 'END_TXT'
  #!/bin/bash -x
  dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $7}')
 +[[ -n "$dev" ]] || dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $5}')
  sleep 1
  sudo ip link set $dev down
- echo "doctor set host down at" $(date "+%s.%N")
+ echo "doctor set link down at" $(date "+%s.%N")
  sleep 180
  sudo ip link set $dev up
  sleep 1
@@@ -247,24 -247,8 +247,8 @@@ END_TX
      chmod +x disable_network.sh
      scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
      ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
- }
- profile_performance_poc() {
-     triggered=$(grep "^doctor set host down at" disable_network.log |\
-                 sed -e "s/^.* at //")
-     vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
-                sed -e "s/^.* at //")
-     hostdown=$(grep "doctor mark host.* down at" inspector.log |\
-                sed -e "s/^.* at //")
-     #calculate the relative interval to triggered(T00)
-     export DOCTOR_PROFILER_T00=0
-     export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc)
-     export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc)
-     export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc)
-     export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc)
-     python profiler-poc.py
+     # use host time to get rid of potential time sync deviation between nodes
+     triggered=$(date "+%s.%N")
  }
  
  calculate_notification_time() {
      notified=$(grep "doctor consumer notified at" consumer.log |\
                 sed -e "s/^.* at //")
  
-     if [[ "$PROFILER_TYPE" == "poc" ]]; then
-         profile_performance_poc
-     fi
      echo "$notified $detected" | \
          awk '{
              d = $1 - $2;
@@@ -309,8 -289,6 +289,8 @@@ unset_forced_down_hosts() 
      for host in $(openstack compute service list --service nova-compute \
                    -f value -c Host -c State | sed -n -e '/down$/s/ *down$//p')
      do
 +        # TODO (r-mibu): make sample inspector use keystone v3 api
 +        OS_AUTH_URL=${OS_AUTH_URL/v3/v2.0} \
          python ./nova_force_down.py $host --unset
      done
  
                  -f value -c State | grep -q down' 240 5
  }
  
+ collect_logs() {
+     unset_forced_down_hosts
+     # TODO: We need to make sure the target compute host is back to IP
+     #       reachable. wait_ping() will be added by tojuvone .
+     sleep 110
+     scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
+     # TODO(yujunz) collect other logs, e.g. nova, aodh
+ }
+ run_profiler() {
+     if [[ "$PROFILER_TYPE" == "poc" ]]; then
+         linkdown=$(grep "doctor set link down at " disable_network.log |\
+                   sed -e "s/^.* at //")
+         vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
+                  sed -e "s/^.* at //")
+         hostdown=$(grep "doctor mark host.* down at" inspector.log |\
+                  sed -e "s/^.* at //")
+         # TODO(yujunz) check the actual delay to verify time sync status
+         # expected ~1s delay from $trigger to $linkdown
+         relative_start=${linkdown}
+         export DOCTOR_PROFILER_T00=$(python -c \
+           "print(int(($linkdown-$relative_start)*1000))")
+         export DOCTOR_PROFILER_T01=$(python -c \
+           "print(int(($detected-$relative_start)*1000))")
+         export DOCTOR_PROFILER_T03=$(python -c \
+           "print(int(($vmdown-$relative_start)*1000))")
+         export DOCTOR_PROFILER_T04=$(python -c \
+           "print(int(($hostdown-$relative_start)*1000))")
+         export DOCTOR_PROFILER_T09=$(python -c \
+           "print(int(($notified-$relative_start)*1000))")
+         python profiler-poc.py >doctor_profiler.log 2>&1
+     fi
+ }
  cleanup() {
      set +e
      echo "cleanup..."
@@@ -398,5 -413,7 +415,7 @@@ sleep 6
  
  check_host_status "(DOWN|UNKNOWN)"
  calculate_notification_time
+ collect_logs
+ run_profiler
  
  echo "done"