2 ##############################################################################
3 # Copyright (c) 2016 NEC Corporation and others.
5 # All rights reserved. This program and the accompanying materials
6 # are made available under the terms of the Apache License, Version 2.0
7 # which accompanies this distribution, and is available at
8 # http://www.apache.org/licenses/LICENSE-2.0
9 ##############################################################################
11 IMAGE_URL=https://launchpad.net/cirros/trunk/0.3.0/+download/cirros-0.3.0-x86_64-disk.img
13 IMAGE_FILE="${IMAGE_NAME}.img"
17 ALARM_NAME=doctor_alarm1
25 SUPPORTED_INSTALLER_TYPES="apex local"
26 INSTALLER_TYPE=${INSTALLER_TYPE:-apex}
27 INSTALLER_IP=${INSTALLER_IP:-none}
28 COMPUTE_HOST=${COMPUTE_HOST:-overcloud-novacompute-0}
29 COMPUTE_IP=${COMPUTE_IP:-none}
30 COMPUTE_USER=${COMPUTE_USER:-heat-admin}
31 ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
33 if [[ ! "$SUPPORTED_INSTALLER_TYPES" =~ "$INSTALLER_TYPE" ]] ; then
34 echo "ERROR: INSTALLER_TYPE=$INSTALLER_TYPE is not supported."
38 prepare_compute_ssh() {
39 ssh_opts_cpu="$ssh_opts"
41 if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
42 if [[ "$INSTALLER_IP" == "none" ]] ; then
43 instack_mac=$(sudo virsh domiflist instack | awk '/default/{print $5}')
44 INSTALLER_IP=$(/usr/sbin/arp -e | grep ${instack_mac} | awk '{print $1}')
47 if [[ "$COMPUTE_IP" == "none" ]] ; then
48 COMPUTE_IP=$(sudo ssh $ssh_opts $INSTALLER_IP \
50 nova show $COMPUTE_HOST \
51 | awk '/ ctlplane network /{print \$5}'")
54 # get ssh key from installer node
55 sudo scp $ssh_opts root@"$INSTALLER_IP":/home/stack/.ssh/id_rsa instack_key
56 sudo chown $(whoami):$(whoami) instack_key
58 ssh_opts_cpu+=" -i instack_key"
59 elif [[ "$INSTALLER_TYPE" == "local" ]] ; then
60 if [[ "$COMPUTE_IP" == "none" ]] ; then
61 COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
62 if [[ -z "$COMPUTE_IP" ]]; then
63 echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
68 echo "INSTALLER_TYPE set to 'local'. Assuming SSH keys already exchanged with $COMPUTE_HOST"
71 # verify connectivity to target compute host
72 ping -c 1 "$COMPUTE_IP"
76 [ -e "$IMAGE_FILE" ] && return 0
77 wget "$IMAGE_URL" -o "$IMAGE_FILE"
81 glance image-list | grep -q " $IMAGE_NAME " && return 0
82 glance image-create --name "$IMAGE_NAME" \
84 --disk-format "$IMAGE_FORMAT" \
85 --container-format bare \
90 keystone user-list | grep -q "$TEST_USER" || {
91 keystone user-create --name "$TEST_USER" --pass "$TEST_PW"
93 keystone tenant-list | grep -q "$TEST_TENANT" || {
94 keystone tenant-create --name "$TEST_TENANT"
96 keystone user-role-list --user "$TEST_USER" --tenant "$TEST_TENANT" \
97 | grep -q "$TEST_ROLE" || {
98 keystone user-role-add --user "$TEST_USER" --role "$TEST_ROLE" \
99 --tenant "$TEST_TENANT"
104 nova list | grep -q " $VM_NAME " && return 0
106 # test VM done with test user, so can test non-admin
107 export OS_USERNAME="$TEST_USER"
108 export OS_PASSWORD="$TEST_PW"
109 export OS_TENANT_NAME="$TEST_TENANT"
110 nova boot --flavor "$VM_FLAVOR" \
111 --image "$IMAGE_NAME" \
119 ceilometer alarm-list | grep -q " $ALARM_NAME " && return 0
120 vm_id=$(nova list | grep " $VM_NAME " | awk '{print $2}')
121 ceilometer alarm-event-create --name "$ALARM_NAME" \
122 --alarm-action "http://localhost:$CONSUMER_PORT/failure" \
123 --description "VM failure" \
125 --repeat-actions False \
126 --severity "moderate" \
127 --event-type compute.instance.update \
128 -q "traits.state=string::error; traits.instance_id=string::$vm_id"
132 pgrep -f "python monitor.py" && return 0
133 sudo python monitor.py "$COMPUTE_HOST" "$COMPUTE_IP" \
134 "http://127.0.0.1:$INSPECTOR_PORT/events" > monitor.log 2>&1 &
138 pgrep -f "python monitor.py" || return 0
139 sudo kill $(pgrep -f "python monitor.py")
144 pgrep -f "python inspector.py" && return 0
145 python inspector.py "$INSPECTOR_PORT" > inspector.log 2>&1 &
149 pgrep -f "python inspector.py" || return 0
150 kill $(pgrep -f "python inspector.py")
155 pgrep -f "python consumer.py" && return 0
156 python consumer.py "$CONSUMER_PORT" > consumer.log 2>&1 &
160 pgrep -f "python consumer.py" || return 0
161 kill $(pgrep -f "python consumer.py")
165 wait_for_vm_launch() {
166 echo "waiting for vm launch..."
169 state=$(nova list | grep " $VM_NAME " | awk '{print $6}')
170 [[ "$state" == "ACTIVE" ]] && return 0
176 echo "disabling network of compute host [$COMPUTE_HOST] for 3 mins..."
177 cat > disable_network.sh << 'END_TXT'
179 dev=$(ip route | awk '/^default/{print $5}')
181 sudo ip link set $dev down
183 sudo ip link set $dev up
186 chmod +x disable_network.sh
187 scp $ssh_opts_cpu disable_network.sh "$COMPUTE_USER@$COMPUTE_IP:"
188 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'nohup ./disable_network.sh > disable_network.log 2>&1 &'
191 calculate_notification_time() {
192 detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
193 notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
194 echo "$notified $detected" | \
195 awk '{d = $1 - $2; if (d < 1 && d > 0) print d " OK"; else print d " NG"}'
198 check_host_status_down() {
200 # Switching to test user
201 export OS_USERNAME="$TEST_USER"
202 export OS_PASSWORD="$TEST_PW"
203 export OS_TENANT_NAME="$TEST_TENANT"
205 host_status_line=$(nova show $VM_NAME | grep "host_status")
207 echo "ERROR: host_status not configured for owner in Nova policy.json"
210 host_status=$(echo $host_status_line | awk '{print $4}')
211 [[ "$host_status" == "DOWN" ]] && {
212 echo "$VM_NAME showing host_status: $host_status"
214 echo "ERROR: host_status not reported by: nova show $VM_NAME"
225 python ./nova_force_down.py "$COMPUTE_HOST" --unset
227 nova delete "$VM_NAME"
229 alarm_id=$(ceilometer alarm-list | grep " $ALARM_NAME " | awk '{print $2}')
231 [ -n "$alarm_id" ] && ceilometer alarm-delete "$alarm_id"
233 image_id=$(glance image-list | grep " $IMAGE_NAME " | awk '{print $2}')
235 [ -n "$image_id" ] && glance image-delete "$image_id"
236 keystone user-role-remove --user "$TEST_USER" --role "$TEST_ROLE" \
237 --tenant "$TEST_TENANT"
238 keystone tenant-remove --name "$TEST_TENANT"
239 keystone user-delete "$TEST_USER"
241 #TODO: add host status check via nova admin api
242 echo "waiting disabled compute host back to be enabled..."
244 ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" \
245 "[ -e disable_network.log ] && cat disable_network.log"
249 echo "Note: doctor/tests/run.sh has been executed."
255 echo "preparing VM image..."
259 echo "starting doctor sample components..."
264 echo "creating test user..."
267 echo "creating VM and alarm..."
273 echo "injecting host failure..."
277 check_host_status_down
278 calculate_notification_time