from flask import Flask
from flask import request
import json
+import logger as doctor_log
import os
+import threading
import time
+from keystoneauth1.identity import v3
+from keystoneauth1 import session
import novaclient.client as novaclient
-import nova_force_down
+LOG = doctor_log.Logger('doctor_inspector').getLogger()
+
+
+class ThreadedResetState(threading.Thread):
+
+ def __init__(self, nova, state, server):
+ threading.Thread.__init__(self)
+ self.nova = nova
+ self.state = state
+ self.server = server
+
+ def run(self):
+ self.nova.servers.reset_state(self.server, self.state)
+ LOG.info('doctor mark vm(%s) error at %s' % (self.server, time.time()))
class DoctorInspectorSample(object):
- nova_api_version = '2.11'
+ NOVA_API_VERSION = '2.34'
+ NUMBER_OF_CLIENTS = 50
+ # TODO(tojuvone): This could be enhanced in future with dynamic
+ # reuse of self.novaclients when all threads in use and
+ # self.NUMBER_OF_CLIENTS based on amount of cores or overriden by input
+ # argument
def __init__(self):
self.servers = collections.defaultdict(list)
- self.nova = novaclient.Client(self.nova_api_version,
- os.environ['OS_USERNAME'],
- os.environ['OS_PASSWORD'],
- os.environ['OS_TENANT_NAME'],
- os.environ['OS_AUTH_URL'],
- connection_pool=True)
- # check nova is available
+ self.novaclients = list()
+ auth = v3.Password(auth_url=os.environ['OS_AUTH_URL'],
+ username=os.environ['OS_USERNAME'],
+ password=os.environ['OS_PASSWORD'],
+ user_domain_name=os.environ['OS_USER_DOMAIN_NAME'],
+ project_name=os.environ['OS_PROJECT_NAME'],
+ project_domain_name=os.environ['OS_PROJECT_DOMAIN_NAME'])
+ sess=session.Session(auth=auth)
+ # Pool of novaclients for redundant usage
+ for i in range(self.NUMBER_OF_CLIENTS):
+ self.novaclients.append(
+ novaclient.Client(self.NOVA_API_VERSION, session=sess,
+ connection_pool=True))
+ # Normally we use this client for non redundant API calls
+ self.nova=self.novaclients[0]
self.nova.servers.list(detailed=False)
self.init_servers_list()
try:
host=server.__dict__.get('OS-EXT-SRV-ATTR:host')
self.servers[host].append(server)
- app.logger.debug('get hostname=%s from server=%s' % (host, server))
+ LOG.debug('get hostname=%s from server=%s' % (host, server))
except Exception as e:
- app.logger.debug('can not get hostname from server=%s' % server)
+ LOG.error('can not get hostname from server=%s' % server)
def disable_compute_host(self, hostname):
- for server in self.servers[hostname]:
- self.nova.servers.reset_state(server, 'error')
-
- # NOTE: We use our own client here instead of this novaclient for a
- # workaround. Once keystone provides v2.1 nova api endpoint
- # in the service catalog which is configured by OpenStack
- # installer, we can use this:
- #
- # self.nova.services.force_down(hostname, 'nova-compute', True)
- #
- nova_force_down.force_down(hostname)
+ threads = []
+ if len(self.servers[hostname]) > self.NUMBER_OF_CLIENTS:
+ # TODO(tojuvone): This could be enhanced in future with dynamic
+ # reuse of self.novaclients when all threads in use
+ LOG.error('%d servers in %s. Can handle only %d'%(
+ self.servers[hostname], hostname, self.NUMBER_OF_CLIENTS))
+ for nova, server in zip(self.novaclients, self.servers[hostname]):
+ t = ThreadedResetState(nova, "error", server)
+ t.start()
+ threads.append(t)
+ for t in threads:
+ t.join()
+ self.nova.services.force_down(hostname, 'nova-compute', True)
+ LOG.info('doctor mark host(%s) down at %s' % (hostname, time.time()))
app = Flask(__name__)
-app.debug = True
inspector = DoctorInspectorSample()
@app.route('/events', methods=['POST'])
def event_posted():
- app.logger.debug('event posted at %s' % time.time())
- app.logger.debug('inspector = %s' % inspector)
- app.logger.debug('received data = %s' % request.data)
+ LOG.info('event posted at %s' % time.time())
+ LOG.info('inspector = %s' % inspector)
+ LOG.info('received data = %s' % request.data)
d = json.loads(request.data)
- hostname = d['hostname']
- event_type = d['type']
- if event_type == 'compute.host.down':
- inspector.disable_compute_host(hostname)
+ for event in d:
+ hostname = event['details']['hostname']
+ event_type = event['type']
+ if event_type == 'compute.host.down':
+ inspector.disable_compute_host(hostname)
return "OK"
args = get_args()
app.run(port=args.port)
+
if __name__ == '__main__':
main()