2 ##############################################################################
3 # Copyright (c) 2016 NEC Corporation and others.
5 # All rights reserved. This program and the accompanying materials
6 # are made available under the terms of the Apache License, Version 2.0
7 # which accompanies this distribution, and is available at
8 # http://www.apache.org/licenses/LICENSE-2.0
9 ##############################################################################
11 [[ "${CI_DEBUG:-true}" == "true" ]] && set -x
13 IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
15 IMAGE_FILE="${IMAGE_NAME}.img"
19 ALARM_NAME=doctor_alarm1
27 SUPPORTED_INSTALLER_TYPES="apex local"
28 INSTALLER_TYPE=${INSTALLER_TYPE:-apex}
29 INSTALLER_IP=${INSTALLER_IP:-none}
30 COMPUTE_HOST=${COMPUTE_HOST:-overcloud-novacompute-0}
31 COMPUTE_IP=${COMPUTE_IP:-none}
32 COMPUTE_USER=${COMPUTE_USER:-heat-admin}
33 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
35 if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
36 echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
40 prepare_compute_ssh() {
41 ssh_opts_cpu="$ssh_opts"
43 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
44 if [[ "$INSTALLER_IP" == "none" ]] ; then
45 instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
46 INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
49 if [[ "$COMPUTE_IP" == "none" ]] ; then
50 COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
52 nova show $COMPUTE_HOST \
53 | awk '/ ctlplane network /{print \$5}'")
56 # get ssh key from installer node
57 sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
58 sudo chown $(whoami):$(whoami) instack_key
60 ssh_opts_cpu+=" -i instack_key"
61 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
62 if [[ "$COMPUTE_IP" == "none" ]] ; then
63 COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
64 if [[ -z "$COMPUTE_IP" ]]; then
65 echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
70 echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
73 # verify connectivity to target compute host
74 ping -c 1 "$COMPUTE_IP"
78 [ -e "$IMAGE_FILE" ] && return 0
79 wget "$IMAGE_URL" -o "$IMAGE_FILE"
83 glance image-list | grep -q " $IMAGE_NAME " && return 0
84 glance image-create --name "$IMAGE_NAME" \
86 --disk-format "$IMAGE_FORMAT" \
87 --container-format bare \
92 keystone user-list | grep -q "$TEST_USER" || {
93 keystone user-create --name "$TEST_USER" --pass "$TEST_PW"
95 keystone tenant-list | grep -q "$TEST_TENANT" || {
96 keystone tenant-create --name "$TEST_TENANT"
98 keystone user-role-list --user "$TEST_USER" --tenant "$TEST_TENANT" \
99 | grep -q "$TEST_ROLE" || {
100 keystone user-role-add --user "$TEST_USER" --role "$TEST_ROLE" \
101 --tenant "$TEST_TENANT"
106 nova list | grep -q " $VM_NAME " && return 0
108 # test VM done with test user, so can test non-admin
109 export OS_USERNAME="$TEST_USER"
110 export OS_PASSWORD="$TEST_PW"
111 export OS_TENANT_NAME="$TEST_TENANT"
112 nova boot --flavor "$VM_FLAVOR" \
113 --image "$IMAGE_NAME" \
121 ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
122 vm_id=$(nova list | grep " $VM_NAME " | awk '{print $2}')
123 ceilometer alarm-event-create --name "$ALARM_NAME" \
124 --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
125 --description "VM failure" \
127 --repeat-actions False \
128 --severity "moderate" \
129 --event-type compute.instance.update \
130 -q "traits.state=string::error; traits.instance_id=string::$vm_id"
134 pgrep -f "python monitor.py" && return 0
135 sudo python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" \
136 "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
140 pgrep -f "python monitor.py" || return 0
141 sudo kill $(pgrep -f "python monitor.py")
146 pgrep -f "python inspector.py" && return 0
147 python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
151 pgrep -f "python inspector.py" || return 0
152 kill $(pgrep -f "python inspector.py")
157 pgrep -f "python consumer.py" && return 0
158 python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
162 pgrep -f "python consumer.py" || return 0
163 kill $(pgrep -f "python consumer.py")
167 wait_for_vm_launch() {
168 echo "waiting for vm launch..."
171 state=$(nova list | grep " $VM_NAME " | awk '{print $6}')
172 [[ "$state" == "ACTIVE" ]] && return 0
178 echo "disabling network of compute host [$COMPUTE_HOST] for 3 mins..."
179 cat > disable_network.sh << 'END_TXT'
181 dev=$(sudo ip route | awk '/^default/{print $5}')
183 sudo ip link set $dev down
185 sudo ip link set $dev up
188 chmod +x disable_network.sh
189 scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
190 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
193 calculate_notification_time() {
194 detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
195 notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
196 echo "$notified $detected" | \
197 awk '{d = $1 - $2; if (d < 1 && d > 0) print d " OK"; else print d " NG"}'
200 check_host_status_down() {
202 # Switching to test user
203 export OS_USERNAME="$TEST_USER"
204 export OS_PASSWORD="$TEST_PW"
205 export OS_TENANT_NAME="$TEST_TENANT"
207 host_status_line=$(nova show $VM_NAME | grep "host_status")
209 echo "ERROR: host_status not configured for owner in Nova policy.json"
212 host_status=$(echo $host_status_line | awk '{print $4}')
213 [[ "$host_status" == "DOWN" ]] && {
214 echo "$VM_NAME showing host_status: $host_status"
216 echo "ERROR: host_status not reported by: nova show $VM_NAME"
227 python ./nova_force_down.py "$COMPUTE_HOST" --unset
229 nova delete "$VM_NAME"
231 alarm_id=$(ceilometer alarm-list | grep " $ALARM_NAME " | awk '{print $2}')
233 [ -n "$alarm_id" ] && ceilometer alarm-delete "$alarm_id"
235 image_id=$(glance image-list | grep " $IMAGE_NAME " | awk '{print $2}')
237 [ -n "$image_id" ] && glance image-delete "$image_id"
238 keystone user-role-remove --user "$TEST_USER" --role "$TEST_ROLE" \
239 --tenant "$TEST_TENANT"
240 keystone tenant-remove --name "$TEST_TENANT"
241 keystone user-delete "$TEST_USER"
243 #TODO: add host status check via nova admin api
244 echo "waiting disabled compute host back to be enabled..."
246 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" \
247 "[ -e disable_network.log ] && cat disable_network.log"
251 echo "Note: doctor/tests/run.sh has been executed."
257 echo "preparing VM image..."
261 echo "starting doctor sample components..."
266 echo "creating test user..."
269 echo "creating VM and alarm..."
275 echo "injecting host failure..."
279 check_host_status_down
280 calculate_notification_time