DOCTOR_PROJECT=doctor
#TODO: change back to `_member_` when JIRA DOCTOR-55 is done
DOCTOR_ROLE=admin
+PROFILER_TYPE=${PROFILER_TYPE:-none}
TOP_DIR=$(cd $(dirname "$0") && pwd)
-ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
as_doctor_user="--os-username $DOCTOR_USER --os-password $DOCTOR_PW
--os-tenant-name $DOCTOR_PROJECT"
compute_host_in_undercloud=${COMPUTE_HOST%%.*}
die_if_not_set $LINENO COMPUTE_HOST "Failed to get compute hostname"
- if is_installer apex; then
- COMPUTE_USER=${COMPUTE_USER:-heat-admin}
- COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
- "source stackrc; \
- nova show $compute_host_in_undercloud \
- | awk '/ ctlplane network /{print \$5}'")
- elif is_installer fuel; then
- COMPUTE_USER=${COMPUTE_USER:-root}
- node_id=$(echo $compute_host_in_undercloud | cut -d "-" -f 2)
- COMPUTE_IP=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \
- "fuel node|awk -F '|' -v id=$node_id '{if (\$1 == id) print \$5}' |xargs")
- elif is_installer local; then
- COMPUTE_USER=${COMPUTE_USER:-$(whoami)}
- COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
- fi
+ get_compute_ip_from_hostname $COMPUTE_HOST
- die_if_not_set $LINENO COMPUTE_IP "Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
echo "COMPUTE_HOST=$COMPUTE_HOST"
echo "COMPUTE_IP=$COMPUTE_IP"
fi
}
-get_consumer_ip() {
+# TODO(r-mibu): update this function to support consumer instance
+# and migrate this function into installer lib
+get_consumer_ip___to_be_removed() {
local get_consumer_command="ip route get $COMPUTE_IP | awk '/ src /{print \$NF}'"
if is_installer apex; then
CONSUMER_IP=$(sudo ssh $ssh_opts root@$INSTALLER_IP \
# avoid some network problems dpends on infra and installers.
# This tunnel will be terminated by stop_consumer() or after 10 mins passed.
if ! is_installer local; then
- if is_installer apex; then
- CONTROLLER_IPS=$(sudo ssh $ssh_opts $INSTALLER_IP \
- "source stackrc; \
- nova list | grep ' overcloud-controller-[0-9] ' \
- | sed -e 's/^.*ctlplane=//' -e 's/ *|\$//'")
- elif is_installer fuel; then
- CONTROLLER_IPS=$(sshpass -p r00tme ssh 2>/dev/null $ssh_opts root@${INSTALLER_IP} \
- "fuel node | grep controller | cut -d '|' -f 5|xargs")
- fi
-
- die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS."
for ip in $CONTROLLER_IPS
do
forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
return 0
fi
if [[ "$state" == "ERROR" ]]; then
+ openstack $as_doctor_user server show $VM_NAME
die $LINENO "vm state is ERROR"
fi
count=$(($count+1))
cat > disable_network.sh << 'END_TXT'
#!/bin/bash -x
dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $7}')
+[[ -n "$dev" ]] || dev=$(sudo ip a | awk '/ @COMPUTE_IP@\//{print $5}')
sleep 1
sudo ip link set $dev down
echo "doctor set host down at" $(date "+%s.%N")
}
profile_performance_poc() {
- total=`python -c "print(int(($notified-$detected)*1000))"`
-
+ triggered=$(grep "^doctor set host down at" disable_network.log |\
+ sed -e "s/^.* at //")
+ vmdown=$(grep "doctor mark vm.* error at" inspector.log |tail -n 1 |\
+ sed -e "s/^.* at //")
+ hostdown=$(grep "doctor mark host.* down at" inspector.log |\
+ sed -e "s/^.* at //")
+
+ #calculate the relative interval to triggered(T00)
export DOCTOR_PROFILER_T00=0
- export DOCTOR_PROFILER_T09=$((total))
+ export DOCTOR_PROFILER_T01=$(echo "($detected-$triggered)*1000/1" |bc)
+ export DOCTOR_PROFILER_T03=$(echo "($vmdown-$triggered)*1000/1" |bc)
+ export DOCTOR_PROFILER_T04=$(echo "($hostdown-$triggered)*1000/1" |bc)
+ export DOCTOR_PROFILER_T09=$(echo "($notified-$triggered)*1000/1" |bc)
+
python profiler-poc.py
}
calculate_notification_time() {
- detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $10}')
- notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $10}')
if ! grep -q "doctor consumer notified at" consumer.log ; then
die $LINENO "Consumer hasn't received fault notification."
fi
- if [[ PROFILER == 'poc' ]]; then
+ #keep 'at' as the last keyword just before the value, and
+ #use regex to get value instead of the fixed column
+ detected=$(grep "doctor monitor detected at" monitor.log |\
+ sed -e "s/^.* at //")
+ notified=$(grep "doctor consumer notified at" consumer.log |\
+ sed -e "s/^.* at //")
+
+ if [[ "$PROFILER_TYPE" == "poc" ]]; then
profile_performance_poc
fi
fi
}
+unset_forced_down_hosts() {
+ for host in $(openstack compute service list --service nova-compute \
+ -f value -c Host -c State | sed -n -e '/down$/s/ *down$//p')
+ do
+ # TODO (r-mibu): make sample inspector use keystone v3 api
+ OS_AUTH_URL=${OS_AUTH_URL/v3/v2.0} \
+ python ./nova_force_down.py $host --unset
+ done
+
+ echo "waiting disabled compute host back to be enabled..."
+ wait_until 'openstack compute service list --service nova-compute
+ -f value -c State | grep -q down' 240 5
+}
+
cleanup() {
set +e
echo "cleanup..."
stop_inspector
stop_consumer
- echo "waiting disabled compute host back to be enabled..."
- python ./nova_force_down.py "$COMPUTE_HOST" --unset
- sleep 240
- check_host_status "UP"
- scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
+ unset_forced_down_hosts
+ # TODO: We need to make sure the target compute host is back to IP
+ # reachable. wait_ping() will be added by tojuvone .
+ sleep 110
+ if is_set COMPUTE_IP; then
+ scp $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP:disable_network.log" .
+ fi
openstack $as_doctor_user server list | grep -q " $VM_NAME " && openstack $as_doctor_user server delete "$VM_NAME"
sleep 1
# Main process
echo "Note: doctor/tests/run.sh has been executed."
+git log --oneline -1 || true # ignore even you don't have git installed
trap cleanup EXIT