complete the release-notes
[stor4nfv.git] / src / ceph / 0006-librbd-LRU-policy-based-eviction.patch
1 From b233d6540160c8bc5cc25b870c2140fa48776fa6 Mon Sep 17 00:00:00 2001
2 From: Dehao Shang <dehao.shang@intel.com>
3 Date: Mon, 6 Aug 2018 22:42:38 +0800
4 Subject: [PATCH 06/10] librbd: LRU policy based eviction
5
6 Signed-off-by: Dehao Shang <dehao.shang@intel.com>
7 Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
8 ---
9  src/tools/rbd_cache/ObjectCacheStore.cc |  73 ++++++++-----
10  src/tools/rbd_cache/ObjectCacheStore.h  |  14 +--
11  src/tools/rbd_cache/Policy.hpp          |  22 ++++
12  src/tools/rbd_cache/SimplePolicy.hpp    | 180 ++++++++++++++++++++++++++++++++
13  4 files changed, 254 insertions(+), 35 deletions(-)
14  create mode 100644 src/tools/rbd_cache/Policy.hpp
15  create mode 100644 src/tools/rbd_cache/SimplePolicy.hpp
16
17 diff --git a/src/tools/rbd_cache/ObjectCacheStore.cc b/src/tools/rbd_cache/ObjectCacheStore.cc
18 index 2a87469..b39fe66 100644
19 --- a/src/tools/rbd_cache/ObjectCacheStore.cc
20 +++ b/src/tools/rbd_cache/ObjectCacheStore.cc
21 @@ -14,12 +14,12 @@ namespace cache {
22  
23  ObjectCacheStore::ObjectCacheStore(CephContext *cct, ContextWQ* work_queue)
24        : m_cct(cct), m_work_queue(work_queue),
25 -        m_cache_table_lock("rbd::cache::ObjectCacheStore"),
26          m_rados(new librados::Rados()) {
27 +  m_policy = new SimplePolicy(4096, 0.9); // TODO
28  }
29  
30  ObjectCacheStore::~ObjectCacheStore() {
31 -
32 +  delete m_policy;
33  }
34  
35  int ObjectCacheStore::init(bool reset) {
36 @@ -43,6 +43,7 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
37    int ret = 0;
38    std::string cache_file_name =  pool_name + object_name;
39  
40 +  //TODO(): lock on ioctx map
41    if (m_ioctxs.find(pool_name) == m_ioctxs.end()) {
42      librados::IoCtx* io_ctx = new librados::IoCtx();
43      ret = m_rados->ioctx_create(pool_name.c_str(), *io_ctx);
44 @@ -58,10 +59,8 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
45    librados::IoCtx* ioctx = m_ioctxs[pool_name]; 
46  
47    //promoting: update metadata 
48 -  {
49 -    Mutex::Locker locker(m_cache_table_lock);
50 -    m_cache_table.emplace(cache_file_name, PROMOTING);
51 -  }
52 +  m_policy->update_status(cache_file_name, PROMOTING);
53 +  assert(PROMOTING == m_policy->get_status(cache_file_name));
54  
55    librados::bufferlist* read_buf = new librados::bufferlist();
56    int object_size = 4096*1024; //TODO(): read config from image metadata
57 @@ -83,42 +82,60 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
58    cache_file.open();
59    ret = cache_file.write_object_to_file(*read_buf, object_size);
60    
61 -  assert(m_cache_table.find(cache_file_name) != m_cache_table.end()); 
62 -
63    // update metadata
64 -  {
65 -    Mutex::Locker locker(m_cache_table_lock);
66 -    m_cache_table.emplace(cache_file_name, PROMOTED);
67 -  }
68 +  assert(PROMOTING == m_policy->get_status(cache_file_name));
69 +  m_policy->update_status(cache_file_name, PROMOTED);
70 +  assert(PROMOTED == m_policy->get_status(cache_file_name));
71  
72    return ret;
73  
74  }
75   
76 +// return -1, client need to read data from cluster.
77 +// return 0,  client directly read data from cache.
78  int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_name) {
79  
80    std::string cache_file_name =  pool_name + object_name;
81 -  {
82 -    Mutex::Locker locker(m_cache_table_lock);
83 -
84 -    auto it = m_cache_table.find(cache_file_name);
85 -    if (it != m_cache_table.end()) {
86 -
87 -      if (it->second == PROMOTING) {
88 -        return -1;
89 -      } else if (it->second == PROMOTED) {
90 -        return 0;
91 -      } else {
92 -        assert(0);
93 -      }
94 -    }
95 +
96 +  // TODO lookup and return status;
97 +
98 +  CACHESTATUS ret;
99 +  ret = m_policy->lookup_object(cache_file_name);
100 +
101 +  switch(ret) {
102 +    case NONE:
103 +      return do_promote(pool_name, object_name);
104 +    case PROMOTING:
105 +      return -1;
106 +    case PROMOTED:
107 +      return 0;
108 +    default:
109 +      return -1;
110    }
111 +}
112  
113 -  int ret = do_promote(pool_name, object_name);
114 +void ObjectCacheStore::evict_thread_body() {
115 +  int ret;
116 +  while(m_evict_go) {
117 +    std::string temp_cache_file;
118  
119 -  return ret;
120 +    ret = m_policy->evict_object(temp_cache_file);
121 +    if(ret == 0) {
122 +      continue;
123 +    }
124 +
125 +    // TODO
126 +    // delete temp_cache_file file.
127 +
128 +    assert(EVICTING == m_policy->get_status(temp_cache_file));
129 +
130 +    m_policy->update_status(temp_cache_file, EVICTED);
131 +
132 +    assert(NONE == m_policy->get_status(temp_cache_file));
133 +  }
134  }
135  
136 +
137  int ObjectCacheStore::shutdown() {
138    m_rados->shutdown();
139    return 0;
140 diff --git a/src/tools/rbd_cache/ObjectCacheStore.h b/src/tools/rbd_cache/ObjectCacheStore.h
141 index db09efa..5118a73 100644
142 --- a/src/tools/rbd_cache/ObjectCacheStore.h
143 +++ b/src/tools/rbd_cache/ObjectCacheStore.h
144 @@ -13,6 +13,7 @@
145  #include "librbd/ImageCtx.h"
146  #include "librbd/ImageState.h"
147  #include "librbd/cache/SharedPersistentObjectCacherFile.h"
148 +#include "SimplePolicy.hpp"
149  
150  using librados::Rados;
151  using librados::IoCtx;
152 @@ -39,6 +40,8 @@ class ObjectCacheStore
153  
154      int lock_cache(std::string vol_name);
155  
156 +    void evict_thread_body();
157 +
158    private:
159      int _evict_object();
160  
161 @@ -48,21 +51,18 @@ class ObjectCacheStore
162                         librados::bufferlist* read_buf,
163                         uint64_t length);
164  
165 -    enum {
166 -      PROMOTING = 0, 
167 -      PROMOTED, 
168 -    };
169 -
170      CephContext *m_cct;
171      ContextWQ* m_work_queue;
172 -    Mutex m_cache_table_lock;
173      RadosRef m_rados;
174  
175 -    std::map<std::string, uint8_t> m_cache_table;
176  
177      std::map<std::string, librados::IoCtx*> m_ioctxs;
178  
179      librbd::cache::SyncFile *m_cache_file;
180 +
181 +    Policy* m_policy;
182 +
183 +    bool m_evict_go;
184  };
185  
186  } // namespace rbd
187 diff --git a/src/tools/rbd_cache/Policy.hpp b/src/tools/rbd_cache/Policy.hpp
188 new file mode 100644
189 index 0000000..575c294
190 --- /dev/null
191 +++ b/src/tools/rbd_cache/Policy.hpp
192 @@ -0,0 +1,22 @@
193 +#ifndef RBD_CACHE_POLICY_HPP
194 +#define RBD_CACHE_POLICY_HPP
195 +
196 +enum CACHESTATUS {
197 +  NONE = 0,
198 +  PROMOTING,
199 +  PROMOTED,
200 +  EVICTING,
201 +  EVICTED,
202 +};
203 +
204 +
205 +class Policy {
206 +public:
207 +  Policy(){}
208 +  virtual ~Policy(){};
209 +  virtual CACHESTATUS lookup_object(std::string) = 0;
210 +  virtual int evict_object(std::string&) = 0;
211 +  virtual void update_status(std::string, CACHESTATUS) = 0;
212 +  virtual CACHESTATUS get_status(std::string) = 0;
213 +};
214 +#endif
215 diff --git a/src/tools/rbd_cache/SimplePolicy.hpp b/src/tools/rbd_cache/SimplePolicy.hpp
216 new file mode 100644
217 index 0000000..a0d8de7
218 --- /dev/null
219 +++ b/src/tools/rbd_cache/SimplePolicy.hpp
220 @@ -0,0 +1,180 @@
221 +#ifndef RBD_CACHE_SIMPLE_POLICY_HPP
222 +#define RBD_CACHE_SIMPLE_POLICY_HPP
223 +
224 +#include "Policy.hpp"
225 +#include "include/lru.h"
226 +#include "common/Mutex.h"
227 +
228 +#include <vector>
229 +#include <unordered_map>
230 +#include <string>
231 +
232 +class SimplePolicy : public Policy {
233 +public:
234 +  SimplePolicy(uint64_t block_num, float level)
235 +    : m_level(level),
236 +      m_lock("SimplePolicy"),
237 +      m_entry_count(block_num)
238 +  {
239 +
240 +    Entry m_entries[m_entry_count];
241 +
242 +    for(auto &entry : m_entries) {
243 +      m_free_lru.lru_insert_bot(&entry);
244 +    }
245 +  }
246 +
247 +  ~SimplePolicy() {}
248 +
249 +  CACHESTATUS lookup_object(std::string cache_file_name) {
250 +    Mutex::Locker locker(m_lock);
251 +
252 +    auto entry_it = m_oid_to_entry.find(cache_file_name);
253 +    if(entry_it == m_oid_to_entry.end()) {
254 +      return NONE;
255 +    }
256 +
257 +    Entry* entry = entry_it->second;
258 +
259 +    LRU* lru;
260 +    if(entry->status == PROMOTED) {
261 +      lru = &m_promoted_lru;
262 +    } else {
263 +      lru = &m_handing_lru;
264 +    }
265 +
266 +    // touch it
267 +    lru->lru_remove(entry);
268 +    lru->lru_insert_top(entry);
269 +
270 +    return entry->status;
271 +  }
272 +
273 +  int evict_object(std::string& out_cache_file_name) {
274 +    Mutex::Locker locker(m_lock);
275 +
276 +    // still have enough free space, don't need to evict lru.
277 +    uint64_t temp_current_size = m_oid_to_entry.size();
278 +    float temp_current_evict_level = temp_current_size / m_entry_count;
279 +    if(temp_current_evict_level < m_level) {
280 +      return 0;
281 +    }
282 +
283 +    // when all entries are USING, PROMOTING or EVICTING, just busy waiting.
284 +    if(m_promoted_lru.lru_get_size() == 0) {
285 +      return 0;
286 +    }
287 +
288 +    assert(m_promoted_lru.lru_get_size() != 0);
289 +
290 +    // evict one item from promoted lru
291 +    Entry *entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_get_next_expire());
292 +    assert(entry != nullptr);
293 +
294 +    assert(entry->status == PROMOTED);
295 +
296 +    out_cache_file_name = entry->cache_file_name;
297 +    entry->status = EVICTING;
298 +
299 +    m_promoted_lru.lru_remove(entry);
300 +    m_handing_lru.lru_insert_top(entry);
301 +
302 +    return 1;
303 +  }
304 +
305 +  // TODO(): simplify the logic
306 +  void update_status(std::string _file_name, CACHESTATUS _status) {
307 +    Mutex::Locker locker(m_lock);
308 +
309 +    Entry* entry;
310 +    auto entry_it = m_oid_to_entry.find(_file_name);
311 +
312 +    // just check.
313 +    if(_status == PROMOTING) {
314 +      assert(m_oid_to_entry.find(_file_name) == m_oid_to_entry.end());
315 +    }
316 +
317 +    // miss this object.
318 +    if(entry_it == m_oid_to_entry.end() && _status == PROMOTING) {
319 +      entry = reinterpret_cast<Entry*>(m_free_lru.lru_get_next_expire());
320 +      if(entry == nullptr) {
321 +        assert(0); // namely evict thread have some problems.
322 +      }
323 +
324 +      entry->status = PROMOTING;
325 +
326 +      m_oid_to_entry[_file_name] = entry;
327 +      m_free_lru.lru_remove(entry);
328 +      m_handing_lru.lru_insert_top(entry);
329 +
330 +      return;
331 +    }
332 +
333 +    assert(entry_it != m_oid_to_entry.end());
334 +
335 +    entry = entry_it->second;
336 +
337 +    // promoting action have been finished, so update it.
338 +    if(entry->status == PROMOTING && _status== PROMOTED) {
339 +      m_handing_lru.lru_remove(entry);
340 +      m_promoted_lru.lru_insert_top(entry);
341 +      entry->status = PROMOTED;
342 +      return;
343 +    }
344 +
345 +    // will delete this cache file
346 +    if(entry->status == PROMOTED && _status == EVICTING) {
347 +      m_promoted_lru.lru_remove(entry);
348 +      m_handing_lru.lru_insert_top(entry);
349 +      entry->status = EVICTING;
350 +      return;
351 +    }
352 +
353 +
354 +    if(_status == EVICTED) {
355 +      m_oid_to_entry.erase(entry_it);
356 +      m_handing_lru.lru_remove(entry);
357 +      m_free_lru.lru_insert_bot(entry);
358 +      return;
359 +    }
360 +
361 +    assert(0);
362 +  }
363 +
364 +  // get entry status
365 +  CACHESTATUS get_status(std::string _file_name) {
366 +    Mutex::Locker locker(m_lock);
367 +    auto entry_it = m_oid_to_entry.find(_file_name);
368 +    if(entry_it == m_oid_to_entry.end()) {
369 +      return NONE;
370 +    }
371 +
372 +    return entry_it->second->status;
373 +  }
374 +
375 +
376 +private:
377 +
378 +  class Entry : public LRUObject {
379 +    public:
380 +      CACHESTATUS status;
381 +      Entry() : status(NONE){}
382 +      std::string cache_file_name;
383 +      void encode(bufferlist &bl){}
384 +      void decode(bufferlist::iterator &it){}
385 +  };
386 +
387 +  std::unordered_map<std::string, Entry*> m_oid_to_entry;
388 +
389 +  LRU m_free_lru;
390 +  LRU m_handing_lru; // include promoting status or evicting status
391 +  LRU m_promoted_lru; // include promoted, using status.
392 +
393 +  mutable Mutex m_lock;
394 +
395 +  float m_level;
396 +  uint64_t m_entry_count;
397 +
398 +};
399 +
400 +#endif
401 -- 
402 2.7.4
403