2 ##############################################################################
3 # Copyright (c) 2016 NEC Corporation and others.
5 # All rights reserved. This program and the accompanying materials
6 # are made available under the terms of the Apache License, Version 2.0
7 # which accompanies this distribution, and is available at
8 # http://www.apache.org/licenses/LICENSE-2.0
9 ##############################################################################
11 [[ "${CI_DEBUG:-true}" == "true" ]] && set -x
13 IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
15 IMAGE_FILE="${IMAGE_NAME}.img"
19 ALARM_NAME=doctor_alarm1
27 SUPPORTED_INSTALLER_TYPES="apex local"
28 INSTALLER_TYPE=${INSTALLER_TYPE:-apex}
29 INSTALLER_IP=${INSTALLER_IP:-none}
30 COMPUTE_HOST=${COMPUTE_HOST:-overcloud-novacompute-0}
31 COMPUTE_IP=${COMPUTE_IP:-none}
32 COMPUTE_USER=${COMPUTE_USER:-heat-admin}
33 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
35 if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
36 echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
40 prepare_compute_ssh() {
41 ssh_opts_cpu="$ssh_opts"
43 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
44 if [[ "$INSTALLER_IP" == "none" ]] ; then
45 instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
46 INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
49 if [[ "$COMPUTE_IP" == "none" ]] ; then
50 COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
52 nova show $COMPUTE_HOST \
53 | awk '/ ctlplane network /{print \$5}'")
56 # get ssh key from installer node
57 sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
58 sudo chown $(whoami):$(whoami) instack_key
60 ssh_opts_cpu+=" -i instack_key"
61 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
62 if [[ "$COMPUTE_IP" == "none" ]] ; then
63 COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
64 if [[ -z "$COMPUTE_IP" ]]; then
65 echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
70 echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
73 # verify connectivity to target compute host
74 ping -c 1 "$COMPUTE_IP"
78 [ -e "$IMAGE_FILE" ] && return 0
79 wget "$IMAGE_URL" -o "$IMAGE_FILE"
83 glance image-list | grep -q " $IMAGE_NAME " && return 0
84 glance image-create --name "$IMAGE_NAME" \
86 --disk-format "$IMAGE_FORMAT" \
87 --container-format bare \
92 openstack user list | grep -q "$TEST_USER" || {
93 openstack user create "$TEST_USER" --password "$TEST_PW"
95 openstack project list | grep -q "$TEST_PROJECT" || {
96 openstack project create "$TEST_PROJECT"
98 openstack user role list "$TEST_USER" --project "$TEST_PROJECT" \
99 | grep -q "$TEST_ROLE" || {
100 openstack role add "$TEST_ROLE" --user "$TEST_USER" \
101 --project "$TEST_PROJECT"
106 nova list | grep -q " $VM_NAME " && return 0
108 # test VM done with test user, so can test non-admin
109 export OS_USERNAME="$TEST_USER"
110 export OS_PASSWORD="$TEST_PW"
111 export OS_TENANT_NAME="$TEST_PROJECT"
112 nova boot --flavor "$VM_FLAVOR" \
113 --image "$IMAGE_NAME" \
121 ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
122 vm_id=$(nova list | grep " $VM_NAME " | awk '{print $2}')
123 ceilometer alarm-event-create --name "$ALARM_NAME" \
124 --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
125 --description "VM failure" \
127 --repeat-actions False \
128 --severity "moderate" \
129 --event-type compute.instance.update \
130 -q "traits.state=string::error; traits.instance_id=string::$vm_id"
134 pgrep -f "python monitor.py" && return 0
135 sudo python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" \
136 "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
140 pgrep -f "python monitor.py" || return 0
141 sudo kill $(pgrep -f "python monitor.py")
146 pgrep -f "python inspector.py" && return 0
147 python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
151 pgrep -f "python inspector.py" || return 0
152 kill $(pgrep -f "python inspector.py")
157 pgrep -f "python consumer.py" && return 0
158 python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
162 pgrep -f "python consumer.py" || return 0
163 kill $(pgrep -f "python consumer.py")
167 wait_for_vm_launch() {
168 echo "waiting for vm launch..."
170 while [[ ${count} -lt 60 ]]
172 state=$(nova list | grep " $VM_NAME " | awk '{print $6}')
173 [[ "$state" == "ACTIVE" ]] && return 0
174 [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
178 echo "ERROR: time out while waiting for vm launch"
183 echo "disabling network of compute host [$COMPUTE_HOST] for 3 mins..."
184 cat > disable_network.sh << 'END_TXT'
186 dev=$(sudo ip route | awk '/^default/{print $5}')
188 sudo ip link set $dev down
190 sudo ip link set $dev up
193 chmod +x disable_network.sh
194 scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
195 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
198 calculate_notification_time() {
199 detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
200 notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
201 echo "$notified $detected" | \
202 awk '{d = $1 - $2; if (d < 1 && d > 0) print d " OK"; else print d " NG"}'
205 check_host_status_down() {
207 # Switching to test user
208 export OS_USERNAME="$TEST_USER"
209 export OS_PASSWORD="$TEST_PW"
210 export OS_TENANT_NAME="$TEST_PROJECT"
212 host_status_line=$(nova show $VM_NAME | grep "host_status")
214 echo "ERROR: host_status not configured for owner in Nova policy.json"
217 host_status=$(echo $host_status_line | awk '{print $4}')
218 [[ "$host_status" == "DOWN" ]] && {
219 echo "$VM_NAME showing host_status: $host_status"
221 echo "ERROR: host_status not reported by: nova show $VM_NAME"
232 python ./nova_force_down.py "$COMPUTE_HOST" --unset
234 nova list | grep -q " $VM_NAME " && nova delete "$VM_NAME"
236 alarm_id=$(ceilometer alarm-list | grep " $ALARM_NAME " | awk '{print $2}')
238 [ -n "$alarm_id" ] && ceilometer alarm-delete "$alarm_id"
240 image_id=$(glance image-list | grep " $IMAGE_NAME " | awk '{print $2}')
242 [ -n "$image_id" ] && glance image-delete "$image_id"
243 openstack role remove "$TEST_ROLE" --user "$TEST_USER" \
244 --project "$TEST_PROJECT"
245 openstack project delete "$TEST_PROJECT"
246 openstack user delete "$TEST_USER"
248 #TODO: add host status check via nova admin api
249 echo "waiting disabled compute host back to be enabled..."
251 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" \
252 "[ -e disable_network.log ] && cat disable_network.log"
256 echo "Note: doctor/tests/run.sh has been executed."
262 echo "preparing VM image..."
266 echo "starting doctor sample components..."
271 echo "creating test user..."
274 echo "creating VM and alarm..."
280 echo "injecting host failure..."
284 check_host_status_down
285 calculate_notification_time