From: Tomi Juvonen Date: Thu, 2 Feb 2017 15:20:38 +0000 (+0000) Subject: Merge "Fix bugs in profiler" X-Git-Tag: danube.1.RC1~16 X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=commitdiff_plain;h=a65bd8e7807e486561017a716468d52a0ba144f9;hp=-c;p=doctor.git Merge "Fix bugs in profiler" --- a65bd8e7807e486561017a716468d52a0ba144f9 diff --combined tests/run.sh index 227c6ea1,a26fafee..70086502 --- a/tests/run.sh +++ b/tests/run.sh @@@ -179,7 -179,8 +179,7 @@@ start_consumer() nova list | grep ' overcloud-controller-[0-9] ' \ | sed -e 's/^.*ctlplane=//' -e 's/ *|\$//'") elif is_installer fuel; then - CONTROLLER_IPS=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \ - "fuel node | grep controller | cut -d '|' -f 5|xargs") + get_controller_ips fi die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS." @@@ -235,10 -236,9 +235,10 @@@ inject_failure() cat > disable_network.sh << 'END_TXT' #!/bin/bash -x dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $7}') +[[ -n "$dev" ]] || dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $5}') sleep 1 sudo ip link set $dev down - echo "doctor set host down at" $(date "+%s.%N") + echo "doctor set link down at" $(date "+%s.%N") sleep 180 sudo ip link set $dev up sleep 1 @@@ -247,24 -247,8 +247,8 @@@ END_TX chmod +x disable_network.sh scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:" ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &' - } - - profile_performance_poc() { - triggered=$(grep "^doctor set host down at" disable_network.log |\ - sed -e "s/^.* at //") - vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\ - sed -e "s/^.* at //") - hostdown=$(grep "doctor mark host.* down at" inspector.log |\ - sed -e "s/^.* at //") - - #calculate the relative interval to triggered(T00) - export DOCTOR_PROFILER_T00=0 - export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc) - export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc) - export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc) - export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc) - - python profiler-poc.py + # use host time to get rid of potential time sync deviation between nodes + triggered=$(date "+%s.%N") } calculate_notification_time() { @@@ -279,10 -263,6 +263,6 @@@ notified=$(grep "doctor consumer notified at" consumer.log |\ sed -e "s/^.* at //") - if [[ "$PROFILER_TYPE" == "poc" ]]; then - profile_performance_poc - fi - echo "$notified $detected" | \ awk '{ d = $1 - $2; @@@ -309,8 -289,6 +289,8 @@@ unset_forced_down_hosts() for host in $(openstack compute service list --service nova-compute \ -f value -c Host -c State | sed -n -e '/down$/s/ *down$//p') do + # TODO (r-mibu): make sample inspector use keystone v3 api + OS_AUTH_URL=${OS_AUTH_URL/v3/v2.0} \ python ./nova_force_down.py $host --unset done @@@ -319,6 -297,43 +299,43 @@@ -f value -c State | grep -q down' 240 5 } + collect_logs() { + unset_forced_down_hosts + # TODO: We need to make sure the target compute host is back to IP + # reachable. wait_ping() will be added by tojuvone . + sleep 110 + scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" . + + # TODO(yujunz) collect other logs, e.g. nova, aodh + } + + run_profiler() { + if [[ "$PROFILER_TYPE" == "poc" ]]; then + linkdown=$(grep "doctor set link down at " disable_network.log |\ + sed -e "s/^.* at //") + vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\ + sed -e "s/^.* at //") + hostdown=$(grep "doctor mark host.* down at" inspector.log |\ + sed -e "s/^.* at //") + + # TODO(yujunz) check the actual delay to verify time sync status + # expected ~1s delay from $trigger to $linkdown + relative_start=${linkdown} + export DOCTOR_PROFILER_T00=$(python -c \ + "print(int(($linkdown-$relative_start)*1000))") + export DOCTOR_PROFILER_T01=$(python -c \ + "print(int(($detected-$relative_start)*1000))") + export DOCTOR_PROFILER_T03=$(python -c \ + "print(int(($vmdown-$relative_start)*1000))") + export DOCTOR_PROFILER_T04=$(python -c \ + "print(int(($hostdown-$relative_start)*1000))") + export DOCTOR_PROFILER_T09=$(python -c \ + "print(int(($notified-$relative_start)*1000))") + + python profiler-poc.py >doctor_profiler.log 2>&1 + fi + } + cleanup() { set +e echo "cleanup..." @@@ -398,5 -413,7 +415,7 @@@ sleep 6 check_host_status "(DOWN|UNKNOWN)" calculate_notification_time + collect_logs + run_profiler echo "done"