--- /dev/null
+# Upstream contributions, bitergia will crawl this and extract the relevant information
+# system is one of Gerrit, Bugzilla, Launchpad (insert more)
+---
+-
+ url: https://blueprints.launchpad.net/ceilometer/+spec/event-alarm-evaluator
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/mark-host-down
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/python-novaclient/+spec/support-force-down-service
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/get-valid-server-state
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/servers-by-host-status
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/maintenance-reason-to-server
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/nova/+spec/service-status-notification
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/congress/+spec/push-type-datasource-driver
+ system: Launchpad
+#-
+# url: https://review.openstack.org/#/c/314915/
+# system: Gerrit
+-
+ url: https://blueprints.launchpad.net/cinder/+spec/mark-services-down
+ system: Launchpad
+-
+ url: https://blueprints.launchpad.net/python-cinderclient/+spec/mark-service-down-cli
+ system: Launchpad
+#-
+# url: https://bugs.launchpad.net/neutron/+bug/1513144
+# system: Launchpad-bug
from flask import Flask
from flask import request
import json
+import logger as doctor_log
import os
import time
+LOG = doctor_log.Logger('doctor_consumer').getLogger()
+
app = Flask(__name__)
@app.route('/failure', methods=['POST'])
def event_posted():
- app.logger.debug('doctor consumer notified at %s' % time.time())
- app.logger.debug('received data = %s' % request.data)
+ LOG.info('doctor consumer notified at %s' % time.time())
+ LOG.info('received data = %s' % request.data)
d = json.loads(request.data)
return "OK"
def main():
args = get_args()
- app.run(host="0.0.0.0", port=args.port, debug=True)
+ app.run(host="0.0.0.0", port=args.port)
if __name__ == '__main__':
--- /dev/null
+#!/bin/bash
+
+# Test if the named environment variable is set and not zero length
+# is_set env-var
+function is_set {
+ local var=\$"$1"
+ eval "[ -n \"$var\" ]"
+}
+
+# Prints backtrace info
+# filename:lineno:function
+# backtrace level
+function backtrace {
+ local level=$1
+ local deep
+ deep=$((${#BASH_SOURCE[@]} - 1))
+ echo "[Call Trace]"
+ while [ $level -le $deep ]; do
+ echo "${BASH_SOURCE[$deep]}:${BASH_LINENO[$deep-1]}:${FUNCNAME[$deep-1]}"
+ deep=$((deep - 1))
+ done
+}
+
+# Prints line number and "message" in error format
+# err $LINENO "message"
+function err {
+ local exitcode=$?
+ local xtrace
+ xtrace=$(set +o | grep xtrace)
+ set +o xtrace
+ local msg="[ERROR] ${BASH_SOURCE[2]}:$1 $2"
+ echo $msg 1>&2;
+ if [[ -n ${LOGDIR} ]]; then
+ echo $msg >> "${LOGDIR}/error.log"
+ fi
+ $xtrace
+ return $exitcode
+}
+
+# Prints line number and "message" then exits
+# die $LINENO "message"
+function die {
+ local exitcode=$?
+ set +o xtrace
+ local line=$1; shift
+ if [ $exitcode == 0 ]; then
+ exitcode=1
+ fi
+ backtrace 2
+ err $line "$*"
+ # Give buffers a second to flush
+ sleep 1
+ exit $exitcode
+}
+
+# Checks an environment variable is not set or has length 0 OR if the
+# exit code is non-zero and prints "message" and exits
+# NOTE: env-var is the variable name without a '$'
+# die_if_not_set $LINENO env-var "message"
+function die_if_not_set {
+ local exitcode=$?
+ local xtrace
+ xtrace=$(set +o | grep xtrace)
+ set +o xtrace
+ local line=$1; shift
+ local evar=$1; shift
+ if ! is_set $evar || [ $exitcode != 0 ]; then
+ die $line "$*"
+ fi
+ $xtrace
+}
+
from flask import Flask
from flask import request
import json
+import logger as doctor_log
import os
import time
import nova_force_down
+LOG = doctor_log.Logger('doctor_inspector').getLogger()
+
class DoctorInspectorSample(object):
try:
host=server.__dict__.get('OS-EXT-SRV-ATTR:host')
self.servers[host].append(server)
- app.logger.debug('get hostname=%s from server=%s' % (host, server))
+ LOG.debug('get hostname=%s from server=%s' % (host, server))
except Exception as e:
- app.logger.debug('can not get hostname from server=%s' % server)
+ LOG.error('can not get hostname from server=%s' % server)
def disable_compute_host(self, hostname):
for server in self.servers[hostname]:
app = Flask(__name__)
-app.debug = True
inspector = DoctorInspectorSample()
@app.route('/events', methods=['POST'])
def event_posted():
- app.logger.debug('event posted at %s' % time.time())
- app.logger.debug('inspector = %s' % inspector)
- app.logger.debug('received data = %s' % request.data)
+ LOG.info('event posted at %s' % time.time())
+ LOG.info('inspector = %s' % inspector)
+ LOG.info('received data = %s' % request.data)
d = json.loads(request.data)
hostname = d['hostname']
event_type = d['type']
args = get_args()
app.run(port=args.port)
+
if __name__ == '__main__':
main()
--- /dev/null
+##############################################################################
+# Copyright (c) 2016 ZTE Corporation and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+# Usage:
+# import doctor_logger
+# logger = doctor_logger.Logger("script_name").getLogger()
+# logger.info("message to be shown with - INFO - ")
+# logger.debug("message to be shown with - DEBUG -")
+
+import logging
+import os
+
+
+class Logger:
+ def __init__(self, logger_name):
+
+ CI_DEBUG = os.getenv('CI_DEBUG')
+
+ self.logger = logging.getLogger(logger_name)
+ self.logger.propagate = 0
+ self.logger.setLevel(logging.DEBUG)
+
+ formatter = logging.Formatter('%(asctime)s %(filename)s %(lineno)d '
+ '%(levelname)-6s %(message)s')
+
+ ch = logging.StreamHandler()
+ ch.setFormatter(formatter)
+ if CI_DEBUG is not None and CI_DEBUG.lower() == "true":
+ ch.setLevel(logging.DEBUG)
+ else:
+ ch.setLevel(logging.INFO)
+ self.logger.addHandler(ch)
+
+ file_handler = logging.FileHandler('%s.log' % logger_name)
+ file_handler.setFormatter(formatter)
+ file_handler.setLevel(logging.DEBUG)
+ self.logger.addHandler(file_handler)
+
+
+ def getLogger(self):
+ return self.logger
+
import argparse
from datetime import datetime
import json
+import logger as doctor_log
import os
import requests
import socket
SUPPORTED_INSPECTOR_TYPES = ['sample', 'congress']
+LOG = doctor_log.Logger('doctor_monitor').getLogger()
+
class DoctorMonitorSample(object):
interval = 0.1 # second
(congress_endpoint, doctor_ds['id']))
def start_loop(self):
- print "start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
- 'i': self.ip_addr}
+ LOG.debug("start ping to host %(h)s (ip=%(i)s)" % {'h': self.hostname,
+ 'i': self.ip_addr})
sock = socket.socket(socket.AF_INET, socket.SOCK_RAW,
socket.IPPROTO_ICMP)
sock.settimeout(self.timeout)
sock.sendto(ICMP_ECHO_MESSAGE, (self.ip_addr, 0))
data = sock.recv(4096)
except socket.timeout:
- print "doctor monitor detected at %s" % time.time()
+ LOG.info("doctor monitor detected at %s" % time.time())
self.report_error()
- print "ping timeout, quit monitoring..."
+ LOG.info("ping timeout, quit monitoring...")
return
time.sleep(self.interval)
SUPPORTED_INSPECTOR_TYPES="sample congress"
INSPECTOR_TYPE=${INSPECTOR_TYPE:-sample}
+TOP_DIR=$(cd $(dirname "$0") && pwd)
ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
as_doctor_user="--os-username $DOCTOR_USER --os-password $DOCTOR_PW
fi
if [[ "$INSTALLER_TYPE" != "local" ]] ; then
- if [[ -z "$INSTALLER_IP" ]] ; then
- echo "ERROR: no installer ip"
- exit 1
- fi
+ die_if_not_set $LINENO INSTALLER_IP "No installer IP"
fi
}
COMPUTE_HOST=$(openstack $as_doctor_user server show $VM_NAME |
grep "OS-EXT-SRV-ATTR:host" | awk '{ print $4 }')
compute_host_in_undercloud=${COMPUTE_HOST%%.*}
- if [[ -z "$COMPUTE_HOST" ]] ; then
- echo "ERROR: failed to get compute hostname"
- exit 1
- fi
+ die_if_not_set $LINENO COMPUTE_HOST "Failed to get compute hostname"
if [[ "$INSTALLER_TYPE" == "apex" ]] ; then
COMPUTE_USER=${COMPUTE_USER:-heat-admin}
COMPUTE_IP=$(getent hosts "$COMPUTE_HOST" | awk '{ print $1 }')
fi
- if [[ -z "$COMPUTE_IP" ]]; then
- echo "ERROR: Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
- exit 1
- fi
+ die_if_not_set $LINENO COMPUTE_IP "Could not resolve $COMPUTE_HOST. Either manually set COMPUTE_IP or enable DNS resolution."
echo "COMPUTE_HOST=$COMPUTE_HOST"
echo "COMPUTE_IP=$COMPUTE_IP"
# verify connectivity to target compute host
ping -c 1 "$COMPUTE_IP"
if [[ $? -ne 0 ]] ; then
- echo "ERROR: can not ping to computer host"
- exit 1
+ die $LINENO "Can not ping to computer host"
fi
# verify ssh to target compute host
ssh $ssh_opts_cpu "$COMPUTE_USER@$COMPUTE_IP" 'exit'
if [[ $? -ne 0 ]] ; then
- echo "ERROR: can not ssh to computer host"
- exit 1
+ die $LINENO "Can not ssh to computer host"
fi
}
fi
echo "CONSUMER_IP=$CONSUMER_IP"
- if [[ -z "$CONSUMER_IP" ]]; then
- echo "ERROR: Could not get CONSUMER_IP."
- exit 1
- fi
+ die_if_not_set $LINENO CONSUMER_IP "Could not get CONSUMER_IP."
}
download_image() {
nova_api_min_version="2.11"
nova_api_version=$(openstack congress datasource list | \
grep nova | grep -Po "(?<='api_version': ')[^']*")
- [[ -z $nova_api_version ]] && nova_api_version="2.0"
+ if ! is_set nova_api_version; then
+ nova_api_version="2.0"
+ fi
if [[ "$nova_api_version" < "$nova_api_min_version" ]]; then
- echo "ERROR: Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
- exit 1
+ die $LINENO "Congress Nova datasource API version < $nova_api_min_version ($nova_api_version)"
fi
openstack congress driver list | grep -q " doctor "
openstack congress datasource list | grep -q " doctor " || {
"fuel node | grep controller | cut -d '|' -f 5|xargs")
fi
- if [[ -z "$CONTROLLER_IPS" ]]; then
- echo "ERROR: Could not get CONTROLLER_IPS."
- exit 1
- fi
+ die_if_not_set $LINENO CONTROLLER_IPS "Could not get CONTROLLER_IPS."
for ip in $CONTROLLER_IPS
do
forward_rule="-R $CONSUMER_PORT:localhost:$CONSUMER_PORT"
sleep 5
return 0
fi
- [[ "$state" == "ERROR" ]] && echo "vm state is ERROR" && exit 1
+ if [[ "$state" == "ERROR" ]]; then
+ die $LINENO "vm state is ERROR"
+ fi
count=$(($count+1))
sleep 1
done
- echo "ERROR: time out while waiting for vm launch"
- exit 1
+ die $LINENO "Time out while waiting for VM launch"
}
inject_failure() {
}
calculate_notification_time() {
- detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $5}')
- notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $5}')
+ detected=$(grep "doctor monitor detected at" monitor.log | awk '{print $10}')
+ notified=$(grep "doctor consumer notified at" consumer.log | awk '{print $10}')
if ! grep -q "doctor consumer notified at" consumer.log ; then
- echo "ERROR: consumer hasn't received fault notification."
- exit 1
+ die $LINENO "Consumer hasn't received fault notification."
fi
echo "$notified $detected" | \
awk '{
host_status_line=$(openstack $as_doctor_user --os-compute-api-version 2.16 \
server show $VM_NAME | grep "host_status")
host_status=$(echo $host_status_line | awk '{print $4}')
- if [ -z "$host_status" ] ; then
- echo "ERROR: host_status not reported by: nova show $VM_NAME"
- exit 1
- elif [[ "$expected_state" =~ "$host_status" ]] ; then
+ die_if_not_set $LINENO host_status "host_status not reported by: nova show $VM_NAME"
+ if [[ "$expected_state" =~ "$host_status" ]] ; then
echo "$VM_NAME showing host_status: $host_status"
else
- echo "ERROR: host_status:$host_status not equal to expected_state: $expected_state"
- exit 1
+ die $LINENO "host_status:$host_status not equal to expected_state: $expected_state"
fi
}
trap cleanup EXIT
+source $TOP_DIR/functions-common
+
echo "preparing test env..."
get_installer_ip
prepare_ssh_to_cloud