2 ##############################################################################
3 # Copyright (c) 2016 NEC Corporation and others.
5 # All rights reserved. This program and the accompanying materials
6 # are made available under the terms of the Apache License, Version 2.0
7 # which accompanies this distribution, and is available at
8 # http://www.apache.org/licenses/LICENSE-2.0
9 ##############################################################################
11 [[ "${CI_DEBUG:-true}" == "true" ]] && set -x
13 IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
15 IMAGE_FILE="${IMAGE_NAME}.img"
19 ALARM_NAME=doctor_alarm1
25 #TODO: change back to `_member_` when JIRA DOCTOR-55 is done
28 SUPPORTED_INSTALLER_TYPES="apex local"
29 INSTALLER_TYPE=${INSTALLER_TYPE:-apex}
30 INSTALLER_IP=${INSTALLER_IP:-none}
31 COMPUTE_USER=${COMPUTE_USER:-none}
32 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
34 if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
35 echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
39 get_compute_host_info() {
43 # get computer host info which VM boot in
44 export COMPUTE_HOST=$(openstack server show $VM_NAME | \
45 grep "OS-EXT-SRV-ATTR:host" | awk '{ print $4 }' |
46 awk -F '.' '{print $1}')
48 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
49 COMPUTE_USER=${COMPUTE_USER:-heat-admin}
50 if [[ "$INSTALLER_IP" == "none" ]] ; then
51 instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
52 INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
54 COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
56 openstack server show $COMPUTE_HOST \
57 | awk '/ ctlplane network /{print \$5}'")
58 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
59 COMPUTE_USER=${COMPUTE_USER:-$(whoami)}
60 COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
61 if [[ -z "$COMPUTE_IP" ]]; then
62 echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
67 # verify connectivity to target compute host
68 ping -c 1 "$COMPUTE_IP"
69 if [[ $? -ne 0 ]] ; then
70 echo "ERROR: can not ping to computer host"
75 prepare_compute_ssh() {
76 ssh_opts_cpu="$ssh_opts"
78 # get ssh key from installer node
79 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
80 sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
81 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
82 echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
85 sudo chown $(whoami):$(whoami) instack_key
87 ssh_opts_cpu+=" -i instack_key"
89 # verify ssh to target compute host
90 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'exit'
91 if [[ $? -ne 0 ]] ; then
92 echo "ERROR: can not ssh to computer host"
98 [ -e "$IMAGE_FILE" ] && return 0
99 wget "$IMAGE_URL" -o "$IMAGE_FILE"
103 openstack image list | grep -q " $IMAGE_NAME " && return 0
104 openstack image create "$IMAGE_NAME" \
106 --disk-format "$IMAGE_FORMAT" \
107 --container-format bare \
112 openstack user list | grep -q " $DOCTOR_USER " || {
113 openstack user create "$DOCTOR_USER" --password "$DOCTOR_PW"
115 openstack project list | grep -q " $DOCTOR_PROJECT " || {
116 openstack project create "$DOCTOR_PROJECT"
118 openstack user role list "$DOCTOR_USER" --project "$DOCTOR_PROJECT" \
119 | grep -q " $DOCTOR_ROLE " || {
120 openstack role add "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
121 --project "$DOCTOR_PROJECT"
125 change_to_doctor_user() {
126 export OS_USERNAME="$DOCTOR_USER"
127 export OS_PASSWORD="$DOCTOR_PW"
128 export OS_PROJECT_NAME="$DOCTOR_PROJECT"
129 export OS_TENANT_NAME="$DOCTOR_PROJECT"
134 # test VM done with test user, so can test non-admin
135 change_to_doctor_user
136 openstack server list | grep -q " $VM_NAME " && return 0
137 openstack server create --flavor "$VM_FLAVOR" \
138 --image "$IMAGE_NAME" \
147 # get vm_id as test user
148 change_to_doctor_user
149 ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
150 vm_id=$(openstack server list | grep " $VM_NAME " | awk '{print $2}')
151 ceilometer alarm-event-create --name "$ALARM_NAME" \
152 --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
153 --description "VM failure" \
155 --repeat-actions False \
156 --severity "moderate" \
157 --event-type compute.instance.update \
158 -q "traits.state=string::error; traits.instance_id=string::$vm_id"
164 pgrep -f "python monitor.py" && return 0
165 sudo python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" \
166 "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
170 pgrep -f "python monitor.py" || return 0
171 sudo kill $(pgrep -f "python monitor.py")
176 pgrep -f "python inspector.py" && return 0
177 python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
181 pgrep -f "python inspector.py" || return 0
182 kill $(pgrep -f "python inspector.py")
187 pgrep -f "python consumer.py" && return 0
188 python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
192 pgrep -f "python consumer.py" || return 0
193 kill $(pgrep -f "python consumer.py")
197 wait_for_vm_launch() {
198 echo "waiting for vm launch..."
201 # get VM state as test user
202 change_to_doctor_user
205 while [[ ${count} -lt 60 ]]
207 state=$(openstack server list | grep " $VM_NAME " | awk '{print $6}')
208 [[ "$state" == "ACTIVE" ]] && return 0
209 [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
213 echo "ERROR: time out while waiting for vm launch"
219 echo "disabling network of compute host [$COMPUTE_HOST] for 3 mins..."
220 cat > disable_network.sh << 'END_TXT'
222 dev=$(sudo ip route | awk '/^default/{print $5}')
224 sudo ip link set $dev down
226 sudo ip link set $dev up
229 chmod +x disable_network.sh
230 scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
231 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
234 calculate_notification_time() {
235 detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
236 notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
237 echo "$notified $detected" | \
238 awk '{d = $1 - $2; if (d < 1 && d > 0) print d " OK"; else print d " NG"}'
241 check_host_status() {
244 change_to_doctor_user
246 host_status_line=$(openstack --os-compute-api-version 2.16 server show $VM_NAME | grep "host_status")
247 if [[ $? -ne 0 ]] ; then
248 echo "ERROR: host_status not configured for owner in Nova policy.json"
252 host_status=$(echo $host_status_line | awk '{print $4}')
253 if [ -z "$host_status" ] ; then
254 echo "ERROR: host_status not reported by: nova show $VM_NAME"
256 elif [[ "$host_status" != "$expect_state" ]] ; then
257 echo "ERROR: host_status:$host_status not equal to expect_state: $expect_state"
260 echo "$VM_NAME showing host_status: $host_status"
272 python ./nova_force_down.py "$COMPUTE_HOST" --unset
275 change_to_doctor_user
276 openstack server list | grep -q " $VM_NAME " && openstack server delete "$VM_NAME"
278 alarm_id=$(ceilometer alarm-list | grep " $ALARM_NAME " | awk '{print $2}')
280 [ -n "$alarm_id" ] && ceilometer alarm-delete "$alarm_id"
283 image_id=$(openstack image list | grep " $IMAGE_NAME " | awk '{print $2}')
285 [ -n "$image_id" ] && openstack image delete "$image_id"
286 openstack role remove "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
287 --project "$DOCTOR_PROJECT"
288 openstack project delete "$DOCTOR_PROJECT"
289 openstack user delete "$DOCTOR_USER"
291 echo "waiting disabled compute host back to be enabled..."
293 check_host_status "UP"
294 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" \
295 "[ -e disable_network.log ] && cat disable_network.log"
299 echo "Note: doctor/tests/run.sh has been executed."
303 echo "preparing VM image..."
307 echo "creating test user..."
310 echo "creating VM and alarm..."
315 echo "get computer host info and prepare to ssh..."
316 get_compute_host_info
319 echo "starting doctor sample components..."
325 echo "injecting host failure..."
329 check_host_status "DOWN"
330 calculate_notification_time