1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
20 #include "SimpleMessenger.h"
22 #include "common/config.h"
23 #include "common/Timer.h"
24 #include "common/errno.h"
25 #include "common/valgrind.h"
26 #include "auth/Crypto.h"
27 #include "include/Spinlock.h"
29 #define dout_subsys ceph_subsys_ms
31 #define dout_prefix _prefix(_dout, this)
32 static ostream& _prefix(std::ostream *_dout, SimpleMessenger *msgr) {
33 return *_dout << "-- " << msgr->get_myaddr() << " ";
41 SimpleMessenger::SimpleMessenger(CephContext *cct, entity_name_t name,
42 string mname, uint64_t _nonce)
43 : SimplePolicyMessenger(cct, name,mname, _nonce),
44 accepter(this, _nonce),
45 dispatch_queue(cct, this, mname),
48 lock("SimpleMessenger::lock"), need_addr(true), did_bind(false),
51 reaper_started(false), reaper_stop(false),
53 local_connection(new PipeConnection(cct, this))
55 ANNOTATE_BENIGN_RACE_SIZED(&timeout, sizeof(timeout),
56 "SimpleMessenger read timeout");
57 ceph_spin_init(&global_seq_lock);
58 init_local_connection();
62 * Destroy the SimpleMessenger. Pretty simple since all the work is done
65 SimpleMessenger::~SimpleMessenger()
67 assert(!did_bind); // either we didn't bind or we shut down the Accepter
68 assert(rank_pipe.empty()); // we don't have any running Pipes.
69 assert(!reaper_started); // the reaper thread is stopped
70 ceph_spin_destroy(&global_seq_lock);
73 void SimpleMessenger::ready()
75 ldout(cct,10) << "ready " << get_myaddr() << dendl;
76 dispatch_queue.start();
85 int SimpleMessenger::shutdown()
87 ldout(cct,10) << "shutdown " << get_myaddr() << dendl;
90 // break ref cycles on the loopback connection
91 local_connection->set_priv(NULL);
101 int SimpleMessenger::_send_message(Message *m, const entity_inst_t& dest)
104 m->get_header().src = get_myname();
107 if (!m->get_priority()) m->set_priority(get_default_send_priority());
109 ldout(cct,1) <<"--> " << dest.name << " "
110 << dest.addr << " -- " << *m
111 << " -- ?+" << m->get_data().length()
115 if (dest.addr == entity_addr_t()) {
116 ldout(cct,0) << "send_message message " << *m
117 << " with empty dest " << dest.addr << dendl;
123 Pipe *pipe = _lookup_pipe(dest.addr);
124 submit_message(m, (pipe ? pipe->connection_state.get() : NULL),
125 dest.addr, dest.name.type(), true);
130 int SimpleMessenger::_send_message(Message *m, Connection *con)
133 m->get_header().src = get_myname();
135 if (!m->get_priority()) m->set_priority(get_default_send_priority());
137 ldout(cct,1) << "--> " << con->get_peer_addr()
139 << " -- ?+" << m->get_data().length()
140 << " " << m << " con " << con
143 submit_message(m, static_cast<PipeConnection*>(con),
144 con->get_peer_addr(), con->get_peer_type(), false);
149 * If my_inst.addr doesn't have an IP set, this function
150 * will fill it in from the passed addr. Otherwise it does nothing and returns.
152 void SimpleMessenger::set_addr_unknowns(const entity_addr_t &addr)
154 if (my_inst.addr.is_blank_ip()) {
155 int port = my_inst.addr.get_port();
156 my_inst.addr.u = addr.u;
157 my_inst.addr.set_port(port);
158 init_local_connection();
162 void SimpleMessenger::set_addr(const entity_addr_t &addr)
164 entity_addr_t t = addr;
167 init_local_connection();
170 int SimpleMessenger::get_proto_version(int peer_type, bool connect)
172 int my_type = my_inst.name.type();
174 // set reply protocol version
175 if (peer_type == my_type) {
177 return cluster_protocol;
182 case CEPH_ENTITY_TYPE_OSD: return CEPH_OSDC_PROTOCOL;
183 case CEPH_ENTITY_TYPE_MDS: return CEPH_MDSC_PROTOCOL;
184 case CEPH_ENTITY_TYPE_MON: return CEPH_MONC_PROTOCOL;
188 case CEPH_ENTITY_TYPE_OSD: return CEPH_OSDC_PROTOCOL;
189 case CEPH_ENTITY_TYPE_MDS: return CEPH_MDSC_PROTOCOL;
190 case CEPH_ENTITY_TYPE_MON: return CEPH_MONC_PROTOCOL;
203 /********************************************
207 #define dout_prefix _prefix(_dout, this)
209 void SimpleMessenger::reaper_entry()
211 ldout(cct,10) << "reaper_entry start" << dendl;
213 while (!reaper_stop) {
214 reaper(); // may drop and retake the lock
217 reaper_cond.Wait(lock);
220 ldout(cct,10) << "reaper_entry done" << dendl;
224 * note: assumes lock is held
226 void SimpleMessenger::reaper()
228 ldout(cct,10) << "reaper" << dendl;
229 assert(lock.is_locked());
231 while (!pipe_reap_queue.empty()) {
232 Pipe *p = pipe_reap_queue.front();
233 pipe_reap_queue.pop_front();
234 ldout(cct,10) << "reaper reaping pipe " << p << " " <<
235 p->get_peer_addr() << dendl;
237 p->discard_out_queue();
238 if (p->connection_state) {
239 // mark_down, mark_down_all, or fault() should have done this,
240 // or accept() may have switch the Connection to a different
241 // Pipe... but make sure!
242 bool cleared = p->connection_state->clear_pipe(p);
245 p->pipe_lock.Unlock();
246 p->unregister_pipe();
247 assert(pipes.count(p));
250 // drop msgr lock while joining thread; the delay through could be
251 // trying to fast dispatch, preventing it from joining without
252 // blocking and deadlocking.
259 ldout(cct,10) << "reaper reaped pipe " << p << " " << p->get_peer_addr() << dendl;
261 ldout(cct,10) << "reaper deleted pipe " << p << dendl;
263 ldout(cct,10) << "reaper done" << dendl;
266 void SimpleMessenger::queue_reap(Pipe *pipe)
268 ldout(cct,10) << "queue_reap " << pipe << dendl;
270 pipe_reap_queue.push_back(pipe);
271 reaper_cond.Signal();
275 bool SimpleMessenger::is_connected(Connection *con)
279 Pipe *p = static_cast<Pipe *>(static_cast<PipeConnection*>(con)->get_pipe());
281 assert(p->msgr == this);
282 r = p->is_connected();
289 int SimpleMessenger::bind(const entity_addr_t &bind_addr)
293 ldout(cct,10) << "rank.bind already started" << dendl;
297 ldout(cct,10) << "rank.bind " << bind_addr << dendl;
301 set<int> avoid_ports;
302 int r = accepter.bind(bind_addr, avoid_ports);
308 int SimpleMessenger::rebind(const set<int>& avoid_ports)
310 ldout(cct,1) << "rebind avoid " << avoid_ports << dendl;
314 return accepter.rebind(avoid_ports);
318 int SimpleMessenger::client_bind(const entity_addr_t &bind_addr)
320 if (!cct->_conf->ms_bind_before_connect)
322 Mutex::Locker l(lock);
324 assert(my_inst.addr == bind_addr);
328 ldout(cct,10) << "rank.bind already started" << dendl;
331 ldout(cct,10) << "rank.bind " << bind_addr << dendl;
333 set_myaddr(bind_addr);
338 int SimpleMessenger::start()
341 ldout(cct,1) << "messenger.start" << dendl;
343 // register at least one entity, first!
344 assert(my_inst.name.type() >= 0);
351 my_inst.addr.nonce = nonce;
352 init_local_connection();
357 reaper_started = true;
358 reaper_thread.create("ms_reaper");
362 Pipe *SimpleMessenger::add_accept_pipe(int sd)
365 Pipe *p = new Pipe(this, Pipe::STATE_ACCEPTING, NULL);
369 p->pipe_lock.Unlock();
371 accepting_pipes.insert(p);
377 * NOTE: assumes messenger.lock held.
379 Pipe *SimpleMessenger::connect_rank(const entity_addr_t& addr,
384 assert(lock.is_locked());
385 assert(addr != my_inst.addr);
387 ldout(cct,10) << "connect_rank to " << addr << ", creating pipe and registering" << dendl;
390 Pipe *pipe = new Pipe(this, Pipe::STATE_CONNECTING,
391 static_cast<PipeConnection*>(con));
392 pipe->pipe_lock.Lock();
393 pipe->set_peer_type(type);
394 pipe->set_peer_addr(addr);
395 pipe->policy = get_policy(type);
396 pipe->start_writer();
399 pipe->pipe_lock.Unlock();
400 pipe->register_pipe();
411 AuthAuthorizer *SimpleMessenger::get_authorizer(int peer_type, bool force_new)
413 return ms_deliver_get_authorizer(peer_type, force_new);
416 bool SimpleMessenger::verify_authorizer(Connection *con, int peer_type,
417 int protocol, bufferlist& authorizer, bufferlist& authorizer_reply,
418 bool& isvalid,CryptoKey& session_key)
420 return ms_deliver_verify_authorizer(con, peer_type, protocol, authorizer, authorizer_reply, isvalid,session_key);
423 ConnectionRef SimpleMessenger::get_connection(const entity_inst_t& dest)
425 Mutex::Locker l(lock);
426 if (my_inst.addr == dest.addr) {
428 return local_connection;
433 Pipe *pipe = _lookup_pipe(dest.addr);
435 ldout(cct, 10) << "get_connection " << dest << " existing " << pipe << dendl;
437 pipe = connect_rank(dest.addr, dest.name.type(), NULL, NULL);
438 ldout(cct, 10) << "get_connection " << dest << " new " << pipe << dendl;
440 Mutex::Locker l(pipe->pipe_lock);
441 if (pipe->connection_state)
442 return pipe->connection_state;
443 // we failed too quickly! retry. FIXME.
447 ConnectionRef SimpleMessenger::get_loopback_connection()
449 return local_connection;
452 void SimpleMessenger::submit_message(Message *m, PipeConnection *con,
453 const entity_addr_t& dest_addr, int dest_type,
456 m->trace.event("simple submitting message");
457 if (cct->_conf->ms_dump_on_send) {
459 ldout(cct, 0) << "submit_message " << *m << "\n";
460 m->get_payload().hexdump(*_dout);
461 if (m->get_data().length() > 0) {
462 *_dout << " data:\n";
463 m->get_data().hexdump(*_dout);
469 // existing connection?
472 bool ok = static_cast<PipeConnection*>(con)->try_get_pipe(&pipe);
474 ldout(cct,0) << "submit_message " << *m << " remote, " << dest_addr
475 << ", failed lossy con, dropping message " << m << dendl;
480 // we loop in case of a racing reconnect, either from us or them
481 pipe->pipe_lock.Lock(); // can't use a Locker because of the Pipe ref
482 if (pipe->state != Pipe::STATE_CLOSED) {
483 ldout(cct,20) << "submit_message " << *m << " remote, " << dest_addr << ", have pipe." << dendl;
485 pipe->pipe_lock.Unlock();
490 ok = con->try_get_pipe(¤t_pipe);
491 pipe->pipe_lock.Unlock();
492 if (current_pipe == pipe) {
493 ldout(cct,20) << "submit_message " << *m << " remote, " << dest_addr
494 << ", had pipe " << pipe << ", but it closed." << dendl;
507 if (my_inst.addr == dest_addr) {
509 ldout(cct,20) << "submit_message " << *m << " local" << dendl;
510 m->set_connection(local_connection.get());
511 dispatch_queue.local_delivery(m, m->get_priority());
515 // remote, no existing pipe.
516 const Policy& policy = get_policy(dest_type);
518 ldout(cct,20) << "submit_message " << *m << " remote, " << dest_addr << ", lossy server for target type "
519 << ceph_entity_type_name(dest_type) << ", no session, dropping." << dendl;
522 ldout(cct,20) << "submit_message " << *m << " remote, " << dest_addr << ", new pipe." << dendl;
523 if (!already_locked) {
524 /** We couldn't handle the Message without reference to global data, so
525 * grab the lock and do it again. If we got here, we know it's a non-lossy
526 * Connection, so we can use our existing pointer without doing another lookup. */
527 Mutex::Locker l(lock);
528 submit_message(m, con, dest_addr, dest_type, true);
530 connect_rank(dest_addr, dest_type, static_cast<PipeConnection*>(con), m);
535 int SimpleMessenger::send_keepalive(Connection *con)
538 Pipe *pipe = static_cast<Pipe *>(
539 static_cast<PipeConnection*>(con)->get_pipe());
541 ldout(cct,20) << "send_keepalive con " << con << ", have pipe." << dendl;
542 assert(pipe->msgr == this);
543 pipe->pipe_lock.Lock();
544 pipe->_send_keepalive();
545 pipe->pipe_lock.Unlock();
548 ldout(cct,0) << "send_keepalive con " << con << ", no pipe." << dendl;
556 void SimpleMessenger::wait()
564 stop_cond.Wait(lock);
570 ldout(cct,20) << "wait: stopping accepter thread" << dendl;
573 ldout(cct,20) << "wait: stopped accepter thread" << dendl;
576 dispatch_queue.shutdown();
577 if (dispatch_queue.is_started()) {
578 ldout(cct,10) << "wait: waiting for dispatch queue" << dendl;
579 dispatch_queue.wait();
580 dispatch_queue.discard_local();
581 ldout(cct,10) << "wait: dispatch queue is stopped" << dendl;
584 if (reaper_started) {
585 ldout(cct,20) << "wait: stopping reaper thread" << dendl;
587 reaper_cond.Signal();
590 reaper_thread.join();
591 reaper_started = false;
592 ldout(cct,20) << "wait: stopped reaper thread" << dendl;
595 // close+reap all pipes
598 ldout(cct,10) << "wait: closing pipes" << dendl;
600 while (!rank_pipe.empty()) {
601 Pipe *p = rank_pipe.begin()->second;
602 p->unregister_pipe();
605 // don't generate an event here; we're shutting down anyway.
606 PipeConnectionRef con = p->connection_state;
609 p->pipe_lock.Unlock();
613 ldout(cct,10) << "wait: waiting for pipes " << pipes << " to close" << dendl;
614 while (!pipes.empty()) {
615 reaper_cond.Wait(lock);
621 ldout(cct,10) << "wait: done." << dendl;
622 ldout(cct,1) << "shutdown complete." << dendl;
627 void SimpleMessenger::mark_down_all()
629 ldout(cct,1) << "mark_down_all" << dendl;
631 for (set<Pipe*>::iterator q = accepting_pipes.begin(); q != accepting_pipes.end(); ++q) {
633 ldout(cct,5) << "mark_down_all accepting_pipe " << p << dendl;
636 PipeConnectionRef con = p->connection_state;
637 if (con && con->clear_pipe(p))
638 dispatch_queue.queue_reset(con.get());
639 p->pipe_lock.Unlock();
641 accepting_pipes.clear();
643 while (!rank_pipe.empty()) {
644 ceph::unordered_map<entity_addr_t,Pipe*>::iterator it = rank_pipe.begin();
645 Pipe *p = it->second;
646 ldout(cct,5) << "mark_down_all " << it->first << " " << p << dendl;
648 p->unregister_pipe();
651 PipeConnectionRef con = p->connection_state;
652 if (con && con->clear_pipe(p))
653 dispatch_queue.queue_reset(con.get());
654 p->pipe_lock.Unlock();
659 void SimpleMessenger::mark_down(const entity_addr_t& addr)
662 Pipe *p = _lookup_pipe(addr);
664 ldout(cct,1) << "mark_down " << addr << " -- " << p << dendl;
665 p->unregister_pipe();
668 if (p->connection_state) {
669 // generate a reset event for the caller in this case, even
670 // though they asked for it, since this is the addr-based (and
671 // not Connection* based) interface
672 PipeConnectionRef con = p->connection_state;
673 if (con && con->clear_pipe(p))
674 dispatch_queue.queue_reset(con.get());
676 p->pipe_lock.Unlock();
678 ldout(cct,1) << "mark_down " << addr << " -- pipe dne" << dendl;
683 void SimpleMessenger::mark_down(Connection *con)
688 Pipe *p = static_cast<Pipe *>(static_cast<PipeConnection*>(con)->get_pipe());
690 ldout(cct,1) << "mark_down " << con << " -- " << p << dendl;
691 assert(p->msgr == this);
692 p->unregister_pipe();
695 if (p->connection_state) {
696 // do not generate a reset event for the caller in this case,
697 // since they asked for it.
698 p->connection_state->clear_pipe(p);
700 p->pipe_lock.Unlock();
703 ldout(cct,1) << "mark_down " << con << " -- pipe dne" << dendl;
708 void SimpleMessenger::mark_disposable(Connection *con)
711 Pipe *p = static_cast<Pipe *>(static_cast<PipeConnection*>(con)->get_pipe());
713 ldout(cct,1) << "mark_disposable " << con << " -- " << p << dendl;
714 assert(p->msgr == this);
716 p->policy.lossy = true;
717 p->pipe_lock.Unlock();
720 ldout(cct,1) << "mark_disposable " << con << " -- pipe dne" << dendl;
725 void SimpleMessenger::learned_addr(const entity_addr_t &peer_addr_for_me)
727 // be careful here: multiple threads may block here, and readers of
728 // my_inst.addr do NOT hold any lock.
730 // this always goes from true -> false under the protection of the
731 // mutex. if it is already false, we need not retake the mutex at
738 entity_addr_t t = peer_addr_for_me;
739 t.set_port(my_inst.addr.get_port());
740 t.set_nonce(my_inst.addr.get_nonce());
741 ANNOTATE_BENIGN_RACE_SIZED(&my_inst.addr, sizeof(my_inst.addr),
742 "SimpleMessenger learned addr");
744 ldout(cct,1) << "learned my addr " << my_inst.addr << dendl;
746 init_local_connection();
751 void SimpleMessenger::init_local_connection()
753 local_connection->peer_addr = my_inst.addr;
754 local_connection->peer_type = my_inst.name.type();
755 local_connection->set_features(CEPH_FEATURES_ALL);
756 ms_deliver_handle_fast_connect(local_connection.get());