src/ceph/src/rgw/rgw_realm_reloader.cc

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3
   4 #include "rgw_realm_reloader.h"
   5 #include "rgw_rados.h"
   6
   7 #include "rgw_bucket.h"
   8 #include "rgw_log.h"
   9 #include "rgw_rest.h"
  10 #include "rgw_user.h"
  11
  12 #include "common/errno.h"
  13
  14 #define dout_subsys ceph_subsys_rgw
  15
  16 #undef dout_prefix
  17 #define dout_prefix (*_dout << "rgw realm reloader: ")
  18
  19
  20 // safe callbacks from SafeTimer are unneccessary. reload() can take a long
  21 // time, so we don't want to hold the mutex and block handle_notify() for the
  22 // duration
  23 static constexpr bool USE_SAFE_TIMER_CALLBACKS = false;
  24
  25
  26 RGWRealmReloader::RGWRealmReloader(RGWRados*& store, std::map<std::string, std::string>& service_map_meta,
  27                                    Pauser* frontends)
  28   : store(store),
  29     service_map_meta(service_map_meta),
  30     frontends(frontends),
  31     timer(store->ctx(), mutex, USE_SAFE_TIMER_CALLBACKS),
  32     mutex("RGWRealmReloader"),
  33     reload_scheduled(nullptr)
  34 {
  35   timer.init();
  36 }
  37
  38 RGWRealmReloader::~RGWRealmReloader()
  39 {
  40   Mutex::Locker lock(mutex);
  41   timer.shutdown();
  42 }
  43
  44 class RGWRealmReloader::C_Reload : public Context {
  45   RGWRealmReloader* reloader;
  46  public:
  47   C_Reload(RGWRealmReloader* reloader) : reloader(reloader) {}
  48   void finish(int r) override { reloader->reload(); }
  49 };
  50
  51 void RGWRealmReloader::handle_notify(RGWRealmNotify type,
  52                                      bufferlist::iterator& p)
  53 {
  54   if (!store) {
  55     /* we're in the middle of reload */
  56     return;
  57   }
  58
  59   CephContext *const cct = store->ctx();
  60
  61   Mutex::Locker lock(mutex);
  62   if (reload_scheduled) {
  63     ldout(cct, 4) << "Notification on realm, reconfiguration "
  64         "already scheduled" << dendl;
  65     return;
  66   }
  67
  68   reload_scheduled = new C_Reload(this);
  69   cond.SignalOne(); // wake reload() if it blocked on a bad configuration
  70
  71   // schedule reload() without delay
  72   timer.add_event_after(0, reload_scheduled);
  73
  74   ldout(cct, 4) << "Notification on realm, reconfiguration scheduled" << dendl;
  75 }
  76
  77 void RGWRealmReloader::reload()
  78 {
  79   CephContext *const cct = store->ctx();
  80   ldout(cct, 1) << "Pausing frontends for realm update..." << dendl;
  81
  82   frontends->pause();
  83
  84   ldout(cct, 1) << "Frontends paused" << dendl;
  85
  86   // TODO: make RGWRados responsible for rgw_log_usage lifetime
  87   rgw_log_usage_finalize();
  88
  89   // destroy the existing store
  90   RGWStoreManager::close_storage(store);
  91   store = nullptr;
  92
  93   ldout(cct, 1) << "Store closed" << dendl;
  94   {
  95     // allow a new notify to reschedule us. it's important that we do this
  96     // before we start loading the new realm, or we could miss some updates
  97     Mutex::Locker lock(mutex);
  98     reload_scheduled = nullptr;
  99   }
 100
 101   while (!store) {
 102     // recreate and initialize a new store
 103     store = RGWStoreManager::get_storage(cct,
 104                                          cct->_conf->rgw_enable_gc_threads,
 105                                          cct->_conf->rgw_enable_lc_threads,
 106                                          cct->_conf->rgw_enable_quota_threads,
 107                                          cct->_conf->rgw_run_sync_thread,
 108                                          cct->_conf->rgw_dynamic_resharding);
 109
 110     ldout(cct, 1) << "Creating new store" << dendl;
 111
 112     RGWRados* store_cleanup = nullptr;
 113     {
 114       Mutex::Locker lock(mutex);
 115
 116       // failure to recreate RGWRados is not a recoverable error, but we
 117       // don't want to assert or abort the entire cluster.  instead, just
 118       // sleep until we get another notification, and retry until we get
 119       // a working configuration
 120       if (store == nullptr) {
 121         lderr(cct) << "Failed to reinitialize RGWRados after a realm "
 122             "configuration update. Waiting for a new update." << dendl;
 123
 124         // sleep until another event is scheduled
 125         while (!reload_scheduled)
 126           cond.Wait(mutex);
 127
 128         ldout(cct, 1) << "Woke up with a new configuration, retrying "
 129             "RGWRados initialization." << dendl;
 130       }
 131
 132       if (reload_scheduled) {
 133         // cancel the event; we'll handle it now
 134         timer.cancel_event(reload_scheduled);
 135         reload_scheduled = nullptr;
 136
 137         // if we successfully created a store, clean it up outside of the lock,
 138         // then continue to loop and recreate another
 139         std::swap(store, store_cleanup);
 140       }
 141     }
 142
 143     if (store_cleanup) {
 144       ldout(cct, 4) << "Got another notification, restarting RGWRados "
 145           "initialization." << dendl;
 146
 147       RGWStoreManager::close_storage(store_cleanup);
 148     }
 149   }
 150
 151   int r = store->register_to_service_map("rgw", service_map_meta);
 152   if (r < 0) {
 153     lderr(cct) << "ERROR: failed to register to service map: " << cpp_strerror(-r) << dendl;
 154
 155     /* ignore error */
 156   }
 157
 158   ldout(cct, 1) << "Finishing initialization of new store" << dendl;
 159   // finish initializing the new store
 160   ldout(cct, 1) << " - REST subsystem init" << dendl;
 161   rgw_rest_init(cct, store, store->get_zonegroup());
 162   ldout(cct, 1) << " - user subsystem init" << dendl;
 163   rgw_user_init(store);
 164   ldout(cct, 1) << " - user subsystem init" << dendl;
 165   rgw_bucket_init(store->meta_mgr);
 166   ldout(cct, 1) << " - usage subsystem init" << dendl;
 167   rgw_log_usage_init(cct, store);
 168
 169   ldout(cct, 1) << "Resuming frontends with new realm configuration." << dendl;
 170
 171   frontends->resume(store);
 172 }