2 ##############################################################################
3 # Copyright (c) 2016 NEC Corporation and others.
5 # All rights reserved. This program and the accompanying materials
6 # are made available under the terms of the Apache License, Version 2.0
7 # which accompanies this distribution, and is available at
8 # http://www.apache.org/licenses/LICENSE-2.0
9 ##############################################################################
11 [[ "${CI_DEBUG:-true}" == "true" ]] && set -x
13 IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
15 IMAGE_FILE="${IMAGE_NAME}.img"
19 ALARM_NAME=doctor_alarm1
27 SUPPORTED_INSTALLER_TYPES="apex local"
28 INSTALLER_TYPE=${INSTALLER_TYPE:-apex}
29 INSTALLER_IP=${INSTALLER_IP:-none}
30 COMPUTE_HOST=${COMPUTE_HOST:-overcloud-novacompute-0}
31 COMPUTE_IP=${COMPUTE_IP:-none}
32 COMPUTE_USER=${COMPUTE_USER:-heat-admin}
33 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
35 if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
36 echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
40 prepare_compute_ssh() {
41 ssh_opts_cpu="$ssh_opts"
43 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
44 if [[ "$INSTALLER_IP" == "none" ]] ; then
45 instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
46 INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
49 if [[ "$COMPUTE_IP" == "none" ]] ; then
50 COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
52 nova show $COMPUTE_HOST \
53 | awk '/ ctlplane network /{print \$5}'")
56 # get ssh key from installer node
57 sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
58 sudo chown $(whoami):$(whoami) instack_key
60 ssh_opts_cpu+=" -i instack_key"
61 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
62 if [[ "$COMPUTE_IP" == "none" ]] ; then
63 COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
64 if [[ -z "$COMPUTE_IP" ]]; then
65 echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
70 echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
73 # verify connectivity to target compute host
74 ping -c 1 "$COMPUTE_IP"
78 [ -e "$IMAGE_FILE" ] && return 0
79 wget "$IMAGE_URL" -o "$IMAGE_FILE"
83 glance image-list | grep -q " $IMAGE_NAME " && return 0
84 glance image-create --name "$IMAGE_NAME" \
86 --disk-format "$IMAGE_FORMAT" \
87 --container-format bare \
92 openstack user list | grep -q " $DOCTOR_USER " || {
93 openstack user create "$DOCTOR_USER" --password "$DOCTOR_PW"
95 openstack project list | grep -q " $DOCTOR_PROJECT " || {
96 openstack project create "$DOCTOR_PROJECT"
98 openstack user role list "$DOCTOR_USER" --project "$DOCTOR_PROJECT" \
99 | grep -q " $DOCTOR_ROLE " || {
100 openstack role add "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
101 --project "$DOCTOR_PROJECT"
105 change_to_doctor_user() {
106 export OS_USERNAME="$DOCTOR_USER"
107 export OS_PASSWORD="$DOCTOR_PW"
108 export OS_PROJECT_NAME="$DOCTOR_PROJECT"
109 export OS_TENANT_NAME="$DOCTOR_PROJECT"
114 # test VM done with test user, so can test non-admin
115 change_to_doctor_user
116 nova list | grep -q " $VM_NAME " && return 0
117 nova boot --flavor "$VM_FLAVOR" \
118 --image "$IMAGE_NAME" \
127 # get vm_id as test user
128 change_to_doctor_user
129 ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
130 vm_id=$(nova list | grep " $VM_NAME " | awk '{print $2}')
131 ceilometer alarm-event-create --name "$ALARM_NAME" \
132 --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
133 --description "VM failure" \
135 --repeat-actions False \
136 --severity "moderate" \
137 --event-type compute.instance.update \
138 -q "traits.state=string::error; traits.instance_id=string::$vm_id"
144 pgrep -f "python monitor.py" && return 0
145 sudo python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" \
146 "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
150 pgrep -f "python monitor.py" || return 0
151 sudo kill $(pgrep -f "python monitor.py")
156 pgrep -f "python inspector.py" && return 0
157 python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
161 pgrep -f "python inspector.py" || return 0
162 kill $(pgrep -f "python inspector.py")
167 pgrep -f "python consumer.py" && return 0
168 python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
172 pgrep -f "python consumer.py" || return 0
173 kill $(pgrep -f "python consumer.py")
177 wait_for_vm_launch() {
178 echo "waiting for vm launch..."
181 # get VM state as test user
182 change_to_doctor_user
185 while [[ ${count} -lt 60 ]]
187 state=$(nova list | grep " $VM_NAME " | awk '{print $6}')
188 [[ "$state" == "ACTIVE" ]] && return 0
189 [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
193 echo "ERROR: time out while waiting for vm launch"
199 echo "disabling network of compute host [$COMPUTE_HOST] for 3 mins..."
200 cat > disable_network.sh << 'END_TXT'
202 dev=$(sudo ip route | awk '/^default/{print $5}')
204 sudo ip link set $dev down
206 sudo ip link set $dev up
209 chmod +x disable_network.sh
210 scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
211 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
214 calculate_notification_time() {
215 detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
216 notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
217 echo "$notified $detected" | \
218 awk '{d = $1 - $2; if (d < 1 && d > 0) print d " OK"; else print d " NG"}'
221 check_host_status_down() {
223 change_to_doctor_user
225 host_status_line=$(nova show $VM_NAME | grep "host_status")
227 echo "ERROR: host_status not configured for owner in Nova policy.json"
230 host_status=$(echo $host_status_line | awk '{print $4}')
231 [[ "$host_status" == "DOWN" ]] && {
232 echo "$VM_NAME showing host_status: $host_status"
234 echo "ERROR: host_status not reported by: nova show $VM_NAME"
245 python ./nova_force_down.py "$COMPUTE_HOST" --unset
248 change_to_doctor_user
249 nova list | grep -q " $VM_NAME " && nova delete "$VM_NAME"
251 alarm_id=$(ceilometer alarm-list | grep " $ALARM_NAME " | awk '{print $2}')
253 [ -n "$alarm_id" ] && ceilometer alarm-delete "$alarm_id"
256 image_id=$(glance image-list | grep " $IMAGE_NAME " | awk '{print $2}')
258 [ -n "$image_id" ] && glance image-delete "$image_id"
259 openstack role remove "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
260 --project "$DOCTOR_PROJECT"
261 openstack project delete "$DOCTOR_PROJECT"
262 openstack user delete "$DOCTOR_USER"
264 #TODO: add host status check via nova admin api
265 echo "waiting disabled compute host back to be enabled..."
267 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" \
268 "[ -e disable_network.log ] && cat disable_network.log"
272 echo "Note: doctor/tests/run.sh has been executed."
278 echo "preparing VM image..."
282 echo "starting doctor sample components..."
287 echo "creating test user..."
290 echo "creating VM and alarm..."
296 echo "injecting host failure..."
300 check_host_status_down
301 calculate_notification_time