2 ##############################################################################
3 # Copyright (c) 2016 NEC Corporation and others.
5 # All rights reserved. This program and the accompanying materials
6 # are made available under the terms of the Apache License, Version 2.0
7 # which accompanies this distribution, and is available at
8 # http://www.apache.org/licenses/LICENSE-2.0
9 ##############################################################################
11 [[ "${CI_DEBUG:-true}" == "true" ]] && set -x
13 IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
15 IMAGE_FILE="${IMAGE_NAME}.img"
19 ALARM_NAME=doctor_alarm1
25 #TODO: change back to `_member_` when JIRA DOCTOR-55 is done
28 SUPPORTED_INSTALLER_TYPES="apex local"
29 INSTALLER_TYPE=${INSTALLER_TYPE:-apex}
30 INSTALLER_IP=${INSTALLER_IP:-none}
31 COMPUTE_HOST=${COMPUTE_HOST:-overcloud-novacompute-0}
32 COMPUTE_IP=${COMPUTE_IP:-none}
33 COMPUTE_USER=${COMPUTE_USER:-heat-admin}
34 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
36 if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
37 echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
41 prepare_compute_ssh() {
42 ssh_opts_cpu="$ssh_opts"
44 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
45 if [[ "$INSTALLER_IP" == "none" ]] ; then
46 instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
47 INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
50 if [[ "$COMPUTE_IP" == "none" ]] ; then
51 COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
53 openstack server show $COMPUTE_HOST \
54 | awk '/ ctlplane network /{print \$5}'")
57 # get ssh key from installer node
58 sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
59 sudo chown $(whoami):$(whoami) instack_key
61 ssh_opts_cpu+=" -i instack_key"
62 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
63 if [[ "$COMPUTE_IP" == "none" ]] ; then
64 COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
65 if [[ -z "$COMPUTE_IP" ]]; then
66 echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
71 echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
74 # verify connectivity to target compute host
75 ping -c 1 "$COMPUTE_IP"
79 [ -e "$IMAGE_FILE" ] && return 0
80 wget "$IMAGE_URL" -o "$IMAGE_FILE"
84 openstack image list | grep -q " $IMAGE_NAME " && return 0
85 openstack image create "$IMAGE_NAME" \
87 --disk-format "$IMAGE_FORMAT" \
88 --container-format bare \
93 openstack user list | grep -q " $DOCTOR_USER " || {
94 openstack user create "$DOCTOR_USER" --password "$DOCTOR_PW"
96 openstack project list | grep -q " $DOCTOR_PROJECT " || {
97 openstack project create "$DOCTOR_PROJECT"
99 openstack user role list "$DOCTOR_USER" --project "$DOCTOR_PROJECT" \
100 | grep -q " $DOCTOR_ROLE " || {
101 openstack role add "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
102 --project "$DOCTOR_PROJECT"
106 change_to_doctor_user() {
107 export OS_USERNAME="$DOCTOR_USER"
108 export OS_PASSWORD="$DOCTOR_PW"
109 export OS_PROJECT_NAME="$DOCTOR_PROJECT"
110 export OS_TENANT_NAME="$DOCTOR_PROJECT"
115 # test VM done with test user, so can test non-admin
116 change_to_doctor_user
117 openstack server list | grep -q " $VM_NAME " && return 0
118 openstack server create --flavor "$VM_FLAVOR" \
119 --image "$IMAGE_NAME" \
128 # get vm_id as test user
129 change_to_doctor_user
130 ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
131 vm_id=$(openstack server list | grep " $VM_NAME " | awk '{print $2}')
132 ceilometer alarm-event-create --name "$ALARM_NAME" \
133 --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
134 --description "VM failure" \
136 --repeat-actions False \
137 --severity "moderate" \
138 --event-type compute.instance.update \
139 -q "traits.state=string::error; traits.instance_id=string::$vm_id"
145 pgrep -f "python monitor.py" && return 0
146 sudo python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" \
147 "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
151 pgrep -f "python monitor.py" || return 0
152 sudo kill $(pgrep -f "python monitor.py")
157 pgrep -f "python inspector.py" && return 0
158 python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
162 pgrep -f "python inspector.py" || return 0
163 kill $(pgrep -f "python inspector.py")
168 pgrep -f "python consumer.py" && return 0
169 python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
173 pgrep -f "python consumer.py" || return 0
174 kill $(pgrep -f "python consumer.py")
178 wait_for_vm_launch() {
179 echo "waiting for vm launch..."
182 # get VM state as test user
183 change_to_doctor_user
186 while [[ ${count} -lt 60 ]]
188 state=$(openstack server list | grep " $VM_NAME " | awk '{print $6}')
189 [[ "$state" == "ACTIVE" ]] && return 0
190 [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
194 echo "ERROR: time out while waiting for vm launch"
200 echo "disabling network of compute host [$COMPUTE_HOST] for 3 mins..."
201 cat > disable_network.sh << 'END_TXT'
203 dev=$(sudo ip route | awk '/^default/{print $5}')
205 sudo ip link set $dev down
207 sudo ip link set $dev up
210 chmod +x disable_network.sh
211 scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
212 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
215 calculate_notification_time() {
216 detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
217 notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
218 echo "$notified $detected" | \
219 awk '{d = $1 - $2; if (d < 1 && d > 0) print d " OK"; else print d " NG"}'
222 check_host_status() {
225 change_to_doctor_user
227 host_status_line=$(openstack server show $VM_NAME | grep "host_status")
228 if [[ $? -ne 0 ]] ; then
229 echo "ERROR: host_status not configured for owner in Nova policy.json"
233 host_status=$(echo $host_status_line | awk '{print $4}')
234 if [ -z "$host_status" ] ; then
235 echo "ERROR: host_status not reported by: nova show $VM_NAME"
237 elif [[ "$host_status" != "$expect_state" ]] ; then
238 echo "ERROR: host_status:$host_status not equal to expect_state: $expect_state"
241 echo "$VM_NAME showing host_status: $host_status"
253 python ./nova_force_down.py "$COMPUTE_HOST" --unset
256 change_to_doctor_user
257 openstack server list | grep -q " $VM_NAME " && openstack server delete "$VM_NAME"
259 alarm_id=$(ceilometer alarm-list | grep " $ALARM_NAME " | awk '{print $2}')
261 [ -n "$alarm_id" ] && ceilometer alarm-delete "$alarm_id"
264 image_id=$(openstack image list | grep " $IMAGE_NAME " | awk '{print $2}')
266 [ -n "$image_id" ] && openstack image delete "$image_id"
267 openstack role remove "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
268 --project "$DOCTOR_PROJECT"
269 openstack project delete "$DOCTOR_PROJECT"
270 openstack user delete "$DOCTOR_USER"
272 echo "waiting disabled compute host back to be enabled..."
274 check_host_status "UP"
275 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" \
276 "[ -e disable_network.log ] && cat disable_network.log"
280 echo "Note: doctor/tests/run.sh has been executed."
286 echo "preparing VM image..."
290 echo "starting doctor sample components..."
295 echo "creating test user..."
298 echo "creating VM and alarm..."
304 echo "injecting host failure..."
308 check_host_status "DOWN"
309 calculate_notification_time