1 From dd4804fb05ad8aca51516b0112975cc91ef85a6b Mon Sep 17 00:00:00 2001
2 From: Yuan Zhou <yuan.zhou@intel.com>
3 Date: Wed, 8 Aug 2018 15:31:47 +0800
4 Subject: [PATCH 07/10] librbd: cleanup policy based promotion/eviction
6 Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
8 src/common/options.cc | 4 +
9 .../rbd_cache/CacheControllerSocketClient.hpp | 3 +-
10 src/tools/rbd_cache/ObjectCacheStore.cc | 63 +++----
11 src/tools/rbd_cache/ObjectCacheStore.h | 10 +-
12 src/tools/rbd_cache/Policy.hpp | 18 +-
13 src/tools/rbd_cache/SimplePolicy.hpp | 188 +++++++++------------
14 6 files changed, 141 insertions(+), 145 deletions(-)
16 diff --git a/src/common/options.cc b/src/common/options.cc
17 index 7839a31..b334c1e 100644
18 --- a/src/common/options.cc
19 +++ b/src/common/options.cc
20 @@ -6365,6 +6365,10 @@ static std::vector<Option> get_rbd_options() {
22 .set_description("shared ssd caching data dir"),
24 + Option("rbd_shared_cache_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
26 + .set_description("shared ssd caching data entries"),
28 Option("rbd_non_blocking_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
30 .set_description("process AIO ops from a dispatch thread to prevent blocking"),
31 diff --git a/src/tools/rbd_cache/CacheControllerSocketClient.hpp b/src/tools/rbd_cache/CacheControllerSocketClient.hpp
32 index 4e1f36c..56b79ce 100644
33 --- a/src/tools/rbd_cache/CacheControllerSocketClient.hpp
34 +++ b/src/tools/rbd_cache/CacheControllerSocketClient.hpp
35 @@ -90,7 +90,8 @@ public:
38 std::unique_lock<std::mutex> lk(m);
41 + cv.wait_for(lk, std::chrono::milliseconds(100));
45 diff --git a/src/tools/rbd_cache/ObjectCacheStore.cc b/src/tools/rbd_cache/ObjectCacheStore.cc
46 index b39fe66..99f90d6 100644
47 --- a/src/tools/rbd_cache/ObjectCacheStore.cc
48 +++ b/src/tools/rbd_cache/ObjectCacheStore.cc
49 @@ -15,7 +15,12 @@ namespace cache {
50 ObjectCacheStore::ObjectCacheStore(CephContext *cct, ContextWQ* work_queue)
51 : m_cct(cct), m_work_queue(work_queue),
52 m_rados(new librados::Rados()) {
53 - m_policy = new SimplePolicy(4096, 0.9); // TODO
55 + uint64_t object_cache_entries =
56 + cct->_conf.get_val<int64_t>("rbd_shared_cache_entries");
58 + //TODO(): allow to set level
59 + m_policy = new SimplePolicy(object_cache_entries, 0.5);
62 ObjectCacheStore::~ObjectCacheStore() {
63 @@ -35,7 +40,16 @@ int ObjectCacheStore::init(bool reset) {
64 lderr(m_cct) << "fail to conect to cluster" << dendl;
67 - //TODO(): check existing cache objects
69 + std::string cache_path = m_cct->_conf.get_val<std::string>("rbd_shared_cache_path");
70 + //TODO(): check and reuse existing cache objects
72 + std::string cmd = "exec rm -rf " + cache_path + "/rbd_cache*; exec mkdir -p " + cache_path;
73 + //TODO(): to use std::filesystem
74 + int r = system(cmd.c_str());
77 + evict_thd = new std::thread([this]{this->evict_thread_body();});
81 @@ -58,10 +72,6 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
83 librados::IoCtx* ioctx = m_ioctxs[pool_name];
85 - //promoting: update metadata
86 - m_policy->update_status(cache_file_name, PROMOTING);
87 - assert(PROMOTING == m_policy->get_status(cache_file_name));
89 librados::bufferlist* read_buf = new librados::bufferlist();
90 int object_size = 4096*1024; //TODO(): read config from image metadata
92 @@ -83,9 +93,9 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
93 ret = cache_file.write_object_to_file(*read_buf, object_size);
96 - assert(PROMOTING == m_policy->get_status(cache_file_name));
97 - m_policy->update_status(cache_file_name, PROMOTED);
98 - assert(PROMOTED == m_policy->get_status(cache_file_name));
99 + assert(OBJ_CACHE_PROMOTING == m_policy->get_status(cache_file_name));
100 + m_policy->update_status(cache_file_name, OBJ_CACHE_PROMOTED);
101 + assert(OBJ_CACHE_PROMOTED == m_policy->get_status(cache_file_name));
105 @@ -97,18 +107,15 @@ int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_na
107 std::string cache_file_name = pool_name + object_name;
109 - // TODO lookup and return status;
112 ret = m_policy->lookup_object(cache_file_name);
116 + case OBJ_CACHE_NONE:
117 return do_promote(pool_name, object_name);
121 + case OBJ_CACHE_PROMOTED:
123 + case OBJ_CACHE_PROMOTING:
127 @@ -117,26 +124,14 @@ int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_na
128 void ObjectCacheStore::evict_thread_body() {
131 - std::string temp_cache_file;
133 - ret = m_policy->evict_object(temp_cache_file);
139 - // delete temp_cache_file file.
141 - assert(EVICTING == m_policy->get_status(temp_cache_file));
143 - m_policy->update_status(temp_cache_file, EVICTED);
145 - assert(NONE == m_policy->get_status(temp_cache_file));
146 + ret = evict_objects();
151 int ObjectCacheStore::shutdown() {
152 + m_evict_go = false;
157 @@ -165,5 +160,13 @@ int ObjectCacheStore::promote_object(librados::IoCtx* ioctx, std::string object_
161 +int ObjectCacheStore::evict_objects() {
162 + std::list<std::string> obj_list;
163 + m_policy->get_evict_list(&obj_list);
164 + for (auto& obj: obj_list) {
171 diff --git a/src/tools/rbd_cache/ObjectCacheStore.h b/src/tools/rbd_cache/ObjectCacheStore.h
172 index 5118a73..ba0e1f1 100644
173 --- a/src/tools/rbd_cache/ObjectCacheStore.h
174 +++ b/src/tools/rbd_cache/ObjectCacheStore.h
176 #include "librbd/cache/SharedPersistentObjectCacherFile.h"
177 #include "SimplePolicy.hpp"
180 using librados::Rados;
181 using librados::IoCtx;
183 @@ -40,10 +41,9 @@ class ObjectCacheStore
185 int lock_cache(std::string vol_name);
187 - void evict_thread_body();
190 - int _evict_object();
191 + void evict_thread_body();
192 + int evict_objects();
194 int do_promote(std::string pool_name, std::string object_name);
196 @@ -61,8 +61,8 @@ class ObjectCacheStore
197 librbd::cache::SyncFile *m_cache_file;
202 + std::thread* evict_thd;
203 + bool m_evict_go = false;
207 diff --git a/src/tools/rbd_cache/Policy.hpp b/src/tools/rbd_cache/Policy.hpp
208 index 575c294..711e3bd 100644
209 --- a/src/tools/rbd_cache/Policy.hpp
210 +++ b/src/tools/rbd_cache/Policy.hpp
212 #ifndef RBD_CACHE_POLICY_HPP
213 #define RBD_CACHE_POLICY_HPP
227 + OBJ_CACHE_NONE = 0,
228 + OBJ_CACHE_PROMOTING,
229 + OBJ_CACHE_PROMOTED,
233 @@ -18,5 +22,9 @@ public:
234 virtual int evict_object(std::string&) = 0;
235 virtual void update_status(std::string, CACHESTATUS) = 0;
236 virtual CACHESTATUS get_status(std::string) = 0;
237 + virtual void get_evict_list(std::list<std::string>* obj_list) = 0;
240 +} // namespace cache
243 diff --git a/src/tools/rbd_cache/SimplePolicy.hpp b/src/tools/rbd_cache/SimplePolicy.hpp
244 index a0d8de7..e785de1 100644
245 --- a/src/tools/rbd_cache/SimplePolicy.hpp
246 +++ b/src/tools/rbd_cache/SimplePolicy.hpp
249 #include "Policy.hpp"
250 #include "include/lru.h"
251 +#include "common/RWLock.h"
252 #include "common/Mutex.h"
255 #include <unordered_map>
262 class SimplePolicy : public Policy {
264 - SimplePolicy(uint64_t block_num, float level)
266 - m_lock("SimplePolicy"),
267 - m_entry_count(block_num)
268 + SimplePolicy(uint64_t block_num, float watermark)
269 + : m_watermark(watermark), m_entry_count(block_num),
270 + m_cache_map_lock("rbd::cache::SimplePolicy::m_cache_map_lock"),
271 + m_free_list_lock("rbd::cache::SimplePolicy::m_free_list_lock")
274 - Entry m_entries[m_entry_count];
276 - for(auto &entry : m_entries) {
277 - m_free_lru.lru_insert_bot(&entry);
278 + for(uint64_t i = 0; i < m_entry_count; i++) {
279 + m_free_list.push_back(new Entry());
286 + for(uint64_t i = 0; i < m_entry_count; i++) {
287 + Entry* entry = reinterpret_cast<Entry*>(m_free_list.front());
289 + m_free_list.pop_front();
293 CACHESTATUS lookup_object(std::string cache_file_name) {
294 - Mutex::Locker locker(m_lock);
296 - auto entry_it = m_oid_to_entry.find(cache_file_name);
297 - if(entry_it == m_oid_to_entry.end()) {
299 + //TODO(): check race condition
300 + RWLock::WLocker wlocker(m_cache_map_lock);
302 + auto entry_it = m_cache_map.find(cache_file_name);
303 + if(entry_it == m_cache_map.end()) {
304 + Mutex::Locker locker(m_free_list_lock);
305 + Entry* entry = reinterpret_cast<Entry*>(m_free_list.front());
306 + assert(entry != nullptr);
307 + m_free_list.pop_front();
308 + entry->status = OBJ_CACHE_PROMOTING;
310 + m_cache_map[cache_file_name] = entry;
312 + return OBJ_CACHE_NONE;
315 Entry* entry = entry_it->second;
318 - if(entry->status == PROMOTED) {
319 - lru = &m_promoted_lru;
321 - lru = &m_handing_lru;
322 + if(entry->status == OBJ_CACHE_PROMOTED) {
324 + m_promoted_lru.lru_touch(entry);
328 - lru->lru_remove(entry);
329 - lru->lru_insert_top(entry);
331 return entry->status;
334 int evict_object(std::string& out_cache_file_name) {
335 - Mutex::Locker locker(m_lock);
337 - // still have enough free space, don't need to evict lru.
338 - uint64_t temp_current_size = m_oid_to_entry.size();
339 - float temp_current_evict_level = temp_current_size / m_entry_count;
340 - if(temp_current_evict_level < m_level) {
344 - // when all entries are USING, PROMOTING or EVICTING, just busy waiting.
345 - if(m_promoted_lru.lru_get_size() == 0) {
349 - assert(m_promoted_lru.lru_get_size() != 0);
351 - // evict one item from promoted lru
352 - Entry *entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_get_next_expire());
353 - assert(entry != nullptr);
355 - assert(entry->status == PROMOTED);
357 - out_cache_file_name = entry->cache_file_name;
358 - entry->status = EVICTING;
360 - m_promoted_lru.lru_remove(entry);
361 - m_handing_lru.lru_insert_top(entry);
362 + RWLock::WLocker locker(m_cache_map_lock);
367 // TODO(): simplify the logic
368 - void update_status(std::string _file_name, CACHESTATUS _status) {
369 - Mutex::Locker locker(m_lock);
370 + void update_status(std::string file_name, CACHESTATUS new_status) {
371 + RWLock::WLocker locker(m_cache_map_lock);
374 - auto entry_it = m_oid_to_entry.find(_file_name);
375 + auto entry_it = m_cache_map.find(file_name);
378 - if(_status == PROMOTING) {
379 - assert(m_oid_to_entry.find(_file_name) == m_oid_to_entry.end());
380 + if(new_status == OBJ_CACHE_PROMOTING) {
381 + assert(entry_it == m_cache_map.end());
384 - // miss this object.
385 - if(entry_it == m_oid_to_entry.end() && _status == PROMOTING) {
386 - entry = reinterpret_cast<Entry*>(m_free_lru.lru_get_next_expire());
387 - if(entry == nullptr) {
388 - assert(0); // namely evict thread have some problems.
391 - entry->status = PROMOTING;
393 - m_oid_to_entry[_file_name] = entry;
394 - m_free_lru.lru_remove(entry);
395 - m_handing_lru.lru_insert_top(entry);
400 - assert(entry_it != m_oid_to_entry.end());
401 + assert(entry_it != m_cache_map.end());
403 entry = entry_it->second;
405 - // promoting action have been finished, so update it.
406 - if(entry->status == PROMOTING && _status== PROMOTED) {
407 - m_handing_lru.lru_remove(entry);
408 + // promoting is done, so update it.
409 + if(entry->status == OBJ_CACHE_PROMOTING && new_status== OBJ_CACHE_PROMOTED) {
410 m_promoted_lru.lru_insert_top(entry);
411 - entry->status = PROMOTED;
415 - // will delete this cache file
416 - if(entry->status == PROMOTED && _status == EVICTING) {
417 - m_promoted_lru.lru_remove(entry);
418 - m_handing_lru.lru_insert_top(entry);
419 - entry->status = EVICTING;
424 - if(_status == EVICTED) {
425 - m_oid_to_entry.erase(entry_it);
426 - m_handing_lru.lru_remove(entry);
427 - m_free_lru.lru_insert_bot(entry);
428 + entry->status = new_status;
432 @@ -142,39 +97,64 @@ public:
436 - CACHESTATUS get_status(std::string _file_name) {
437 - Mutex::Locker locker(m_lock);
438 - auto entry_it = m_oid_to_entry.find(_file_name);
439 - if(entry_it == m_oid_to_entry.end()) {
441 + CACHESTATUS get_status(std::string file_name) {
442 + RWLock::RLocker locker(m_cache_map_lock);
443 + auto entry_it = m_cache_map.find(file_name);
444 + if(entry_it == m_cache_map.end()) {
445 + return OBJ_CACHE_NONE;
448 return entry_it->second->status;
451 + void get_evict_list(std::list<std::string>* obj_list) {
452 + RWLock::WLocker locker(m_cache_map_lock);
453 + // check free ratio, pop entries from LRU
454 + if (m_free_list.size() / m_entry_count < m_watermark) {
455 + int evict_num = 10; //TODO(): make this configurable
456 + for(int i = 0; i < evict_num; i++) {
457 + Entry* entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_expire());
458 + if (entry == nullptr) {
461 + std::string file_name = entry->cache_file_name;
462 + obj_list->push_back(file_name);
464 + auto entry_it = m_cache_map.find(file_name);
465 + m_cache_map.erase(entry_it);
467 + //mark this entry as free
468 + entry->status = OBJ_CACHE_NONE;
469 + Mutex::Locker locker(m_free_list_lock);
470 + m_free_list.push_back(entry);
477 class Entry : public LRUObject {
480 - Entry() : status(NONE){}
481 + Entry() : status(OBJ_CACHE_NONE){}
482 std::string cache_file_name;
483 void encode(bufferlist &bl){}
484 void decode(bufferlist::iterator &it){}
487 - std::unordered_map<std::string, Entry*> m_oid_to_entry;
489 + uint64_t m_entry_count;
492 - LRU m_handing_lru; // include promoting status or evicting status
493 - LRU m_promoted_lru; // include promoted, using status.
494 + std::unordered_map<std::string, Entry*> m_cache_map;
495 + RWLock m_cache_map_lock;
497 - mutable Mutex m_lock;
498 + std::deque<Entry*> m_free_list;
499 + Mutex m_free_list_lock;
502 - uint64_t m_entry_count;
503 + LRU m_promoted_lru; // include promoted, using status.
507 +} // namespace cache