2 ##############################################################################
3 # Copyright (c) 2016 NEC Corporation and others.
5 # All rights reserved. This program and the accompanying materials
6 # are made available under the terms of the Apache License, Version 2.0
7 # which accompanies this distribution, and is available at
8 # http://www.apache.org/licenses/LICENSE-2.0
9 ##############################################################################
11 [[ "${CI_DEBUG:-true}" == "true" ]] && set -x
13 IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
15 IMAGE_FILE="${IMAGE_NAME}.img"
19 ALARM_NAME=doctor_alarm1
25 #TODO: change back to `_member_` when JIRA DOCTOR-55 is done
28 SUPPORTED_INSTALLER_TYPES="apex local"
29 INSTALLER_TYPE=${INSTALLER_TYPE:-apex}
30 INSTALLER_IP=${INSTALLER_IP:-none}
31 COMPUTE_USER=${COMPUTE_USER:-none}
32 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
34 if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
35 echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
39 get_compute_host_info() {
43 # get computer host info which VM boot in
44 export COMPUTE_HOST=$(openstack server show $VM_NAME | \
45 grep "OS-EXT-SRV-ATTR:host" | awk '{ print $4 }' |
46 awk -F '.' '{print $1}')
48 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
49 COMPUTE_USER=${COMPUTE_USER:-heat-admin}
50 if [[ "$INSTALLER_IP" == "none" ]] ; then
51 instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
52 INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
54 COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
56 openstack server show $COMPUTE_HOST \
57 | awk '/ ctlplane network /{print \$5}'")
58 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
59 COMPUTE_USER=${COMPUTE_USER:-$(whoami)}
60 COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
61 if [[ -z "$COMPUTE_IP" ]]; then
62 echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
67 # verify connectivity to target compute host
68 ping -c 1 "$COMPUTE_IP"
69 if [[ $? -ne 0 ]] ; then
70 echo "ERROR: can not ping to computer host"
75 prepare_compute_ssh() {
76 ssh_opts_cpu="$ssh_opts"
78 # get ssh key from installer node
79 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
80 sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
81 sudo chown $(whoami):$(whoami) instack_key
83 ssh_opts_cpu+=" -i instack_key"
84 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
85 echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
88 # verify ssh to target compute host
89 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'exit'
90 if [[ $? -ne 0 ]] ; then
91 echo "ERROR: can not ssh to computer host"
97 [ -e "$IMAGE_FILE" ] && return 0
98 wget "$IMAGE_URL" -o "$IMAGE_FILE"
102 openstack image list | grep -q " $IMAGE_NAME " && return 0
103 openstack image create "$IMAGE_NAME" \
105 --disk-format "$IMAGE_FORMAT" \
106 --container-format bare \
111 openstack user list | grep -q " $DOCTOR_USER " || {
112 openstack user create "$DOCTOR_USER" --password "$DOCTOR_PW"
114 openstack project list | grep -q " $DOCTOR_PROJECT " || {
115 openstack project create "$DOCTOR_PROJECT"
117 openstack user role list "$DOCTOR_USER" --project "$DOCTOR_PROJECT" \
118 | grep -q " $DOCTOR_ROLE " || {
119 openstack role add "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
120 --project "$DOCTOR_PROJECT"
124 change_to_doctor_user() {
125 export OS_USERNAME="$DOCTOR_USER"
126 export OS_PASSWORD="$DOCTOR_PW"
127 export OS_PROJECT_NAME="$DOCTOR_PROJECT"
128 export OS_TENANT_NAME="$DOCTOR_PROJECT"
133 # test VM done with test user, so can test non-admin
134 change_to_doctor_user
135 openstack server list | grep -q " $VM_NAME " && return 0
136 openstack server create --flavor "$VM_FLAVOR" \
137 --image "$IMAGE_NAME" \
146 # get vm_id as test user
147 change_to_doctor_user
148 ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
149 vm_id=$(openstack server list | grep " $VM_NAME " | awk '{print $2}')
150 ceilometer alarm-event-create --name "$ALARM_NAME" \
151 --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
152 --description "VM failure" \
154 --repeat-actions False \
155 --severity "moderate" \
156 --event-type compute.instance.update \
157 -q "traits.state=string::error; traits.instance_id=string::$vm_id"
163 pgrep -f "python monitor.py" && return 0
164 sudo python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" \
165 "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
169 pgrep -f "python monitor.py" || return 0
170 sudo kill $(pgrep -f "python monitor.py")
175 pgrep -f "python inspector.py" && return 0
176 python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
180 pgrep -f "python inspector.py" || return 0
181 kill $(pgrep -f "python inspector.py")
186 pgrep -f "python consumer.py" && return 0
187 python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
191 pgrep -f "python consumer.py" || return 0
192 kill $(pgrep -f "python consumer.py")
196 wait_for_vm_launch() {
197 echo "waiting for vm launch..."
200 # get VM state as test user
201 change_to_doctor_user
204 while [[ ${count} -lt 60 ]]
206 state=$(openstack server list | grep " $VM_NAME " | awk '{print $6}')
207 [[ "$state" == "ACTIVE" ]] && return 0
208 [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
212 echo "ERROR: time out while waiting for vm launch"
218 echo "disabling network of compute host [$COMPUTE_HOST] for 3 mins..."
219 cat > disable_network.sh << 'END_TXT'
221 dev=$(sudo ip route | awk '/^default/{print $5}')
223 sudo ip link set $dev down
225 sudo ip link set $dev up
228 chmod +x disable_network.sh
229 scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
230 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
233 calculate_notification_time() {
234 detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
235 notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
236 echo "$notified $detected" | \
237 awk '{d = $1 - $2; if (d < 1 && d > 0) print d " OK"; else print d " NG"}'
240 check_host_status() {
243 change_to_doctor_user
245 host_status_line=$(openstack --os-compute-api-version 2.16 server show $VM_NAME | grep "host_status")
246 if [[ $? -ne 0 ]] ; then
247 echo "ERROR: host_status not configured for owner in Nova policy.json"
251 host_status=$(echo $host_status_line | awk '{print $4}')
252 if [ -z "$host_status" ] ; then
253 echo "ERROR: host_status not reported by: nova show $VM_NAME"
255 elif [[ "$host_status" != "$expect_state" ]] ; then
256 echo "ERROR: host_status:$host_status not equal to expect_state: $expect_state"
259 echo "$VM_NAME showing host_status: $host_status"
271 echo "waiting disabled compute host back to be enabled..."
272 python ./nova_force_down.py "$COMPUTE_HOST" --unset
274 check_host_status "UP"
275 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" \
276 "[ -e disable_network.log ] && cat disable_network.log"
280 change_to_doctor_user
281 openstack server list | grep -q " $VM_NAME " && openstack server delete "$VM_NAME"
283 alarm_id=$(ceilometer alarm-list | grep " $ALARM_NAME " | awk '{print $2}')
285 [ -n "$alarm_id" ] && ceilometer alarm-delete "$alarm_id"
288 image_id=$(openstack image list | grep " $IMAGE_NAME " | awk '{print $2}')
290 [ -n "$image_id" ] && openstack image delete "$image_id"
291 openstack role remove "$DOCTOR_ROLE" --user "$DOCTOR_USER" \
292 --project "$DOCTOR_PROJECT"
293 openstack project delete "$DOCTOR_PROJECT"
294 openstack user delete "$DOCTOR_USER"
298 echo "Note: doctor/tests/run.sh has been executed."
302 echo "preparing VM image..."
306 echo "creating test user..."
309 echo "creating VM and alarm..."
314 echo "get computer host info and prepare to ssh..."
315 get_compute_host_info
318 echo "starting doctor sample components..."
324 echo "injecting host failure..."
328 check_host_status "DOWN"
329 calculate_notification_time