initial code repo
[stor4nfv.git] / src / ceph / src / os / bluestore / NVMEDevice.h
diff --git a/src/ceph/src/os/bluestore/NVMEDevice.h b/src/ceph/src/os/bluestore/NVMEDevice.h
new file mode 100644 (file)
index 0000000..40378ea
--- /dev/null
@@ -0,0 +1,243 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+  *
+ * Copyright (C) 2015 XSky <haomai@xsky.com>
+ *
+ * Author: Haomai Wang <haomaiwang@gmail.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_OS_BLUESTORE_NVMEDEVICE
+#define CEPH_OS_BLUESTORE_NVMEDEVICE
+
+#include <queue>
+#include <map>
+#include <limits>
+
+// since _Static_assert introduced in c11
+#define _Static_assert static_assert
+
+
+#include "include/interval_set.h"
+#include "common/ceph_time.h"
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "BlockDevice.h"
+
+enum class IOCommand {
+  READ_COMMAND,
+  WRITE_COMMAND,
+  FLUSH_COMMAND
+};
+
+class Task;
+class PerfCounters;
+class SharedDriverData;
+
+class NVMEDevice : public BlockDevice {
+  /**
+   * points to pinned, physically contiguous memory region;
+   * contains 4KB IDENTIFY structure for controller which is
+   *  target for CONTROLLER IDENTIFY command during initialization
+   */
+  SharedDriverData *driver;
+  string name;
+
+  uint64_t size;
+  uint64_t block_size;
+
+  bool aio_stop;
+
+  struct BufferedExtents {
+    struct Extent {
+      uint64_t x_len;
+      uint64_t x_off;
+      const char *data;
+      uint64_t data_len;
+    };
+    using Offset = uint64_t;
+    map<Offset, Extent> buffered_extents;
+    uint64_t left_edge = std::numeric_limits<uint64_t>::max();
+    uint64_t right_edge = 0;
+
+    void verify() {
+      interval_set<uint64_t> m;
+      for (auto && it : buffered_extents) {
+        assert(!m.intersects(it.first, it.second.x_len));
+        m.insert(it.first, it.second.x_len);
+      }
+    }
+
+    void insert(uint64_t off, uint64_t len, const char *data) {
+      auto it = buffered_extents.lower_bound(off);
+      if (it != buffered_extents.begin()) {
+        --it;
+        if (it->first + it->second.x_len <= off)
+          ++it;
+      }
+      uint64_t end = off + len;
+      if (off < left_edge)
+        left_edge = off;
+      if (end > right_edge)
+        right_edge = end;
+      while (it != buffered_extents.end()) {
+        if (it->first >= end)
+          break;
+        uint64_t extent_it_end = it->first + it->second.x_len;
+        assert(extent_it_end >= off);
+        if (it->first <= off) {
+          if (extent_it_end > end) {
+            //         <-     data    ->
+            // <-            it           ->
+            it->second.x_len -= (extent_it_end - off);
+            buffered_extents[end] = Extent{
+                extent_it_end - end, it->second.x_off + it->second.x_len + len, it->second.data, it->second.data_len};
+          } else {
+            //         <-     data    ->
+            // <-     it    ->
+            assert(extent_it_end <= end);
+            it->second.x_len -= (extent_it_end - off);
+          }
+          ++it;
+        } else {
+          assert(it->first > off);
+          if (extent_it_end > end) {
+            //  <-     data    ->
+            //      <-           it          ->
+            uint64_t overlap = end - it->first;
+            buffered_extents[end] = Extent{
+                it->second.x_len - overlap, it->second.x_off + overlap, it->second.data, it->second.data_len};
+          } else {
+            //  <-     data    ->
+            //      <- it ->
+          }
+          buffered_extents.erase(it++);
+        }
+      }
+      buffered_extents[off] = Extent{
+          len, 0, data, len};
+
+      if (0)
+        verify();
+    }
+
+    void memcpy_check(char *dst, uint64_t dst_raw_len, uint64_t dst_off,
+                      map<Offset, Extent>::iterator &it, uint64_t src_off, uint64_t copylen) {
+      if (0) {
+        assert(dst_off + copylen <= dst_raw_len);
+        assert(it->second.x_off + src_off + copylen <= it->second.data_len);
+      }
+      memcpy(dst + dst_off, it->second.data + it->second.x_off + src_off, copylen);
+    }
+
+    uint64_t read_overlap(uint64_t off, uint64_t len, char *buf) {
+      uint64_t end = off + len;
+      if (end <= left_edge || off >= right_edge)
+        return 0;
+
+      uint64_t copied = 0;
+      auto it = buffered_extents.lower_bound(off);
+      if (it != buffered_extents.begin()) {
+        --it;
+        if (it->first + it->second.x_len <= off)
+          ++it;
+      }
+      uint64_t copy_len;
+      while (it != buffered_extents.end()) {
+        if (it->first >= end)
+          break;
+        uint64_t extent_it_end = it->first + it->second.x_len;
+        assert(extent_it_end >= off);
+        if (it->first >= off) {
+          if (extent_it_end > end) {
+            //  <-     data    ->
+            //      <-           it          ->
+            copy_len = len - (it->first - off);
+            memcpy_check(buf, len, it->first - off, it, 0, copy_len);
+          } else {
+            //  <-     data    ->
+            //      <- it ->
+            copy_len = it->second.x_len;
+            memcpy_check(buf, len, it->first - off, it, 0, copy_len);
+          }
+        } else {
+          if (extent_it_end > end) {
+            //         <-     data    ->
+            // <-           it          ->
+            copy_len = len;
+            memcpy_check(buf, len, 0, it, off - it->first, copy_len);
+          } else {
+            //         <-     data    ->
+            // <-     it    ->
+            assert(extent_it_end <= end);
+            copy_len = it->first + it->second.x_len - off;
+            memcpy_check(buf, len, 0, it, off - it->first, copy_len);
+          }
+        }
+        copied += copy_len;
+        ++it;
+      }
+      return copied;
+    }
+
+    void clear() {
+      buffered_extents.clear();
+      left_edge = std::numeric_limits<uint64_t>::max();
+      right_edge = 0;
+    }
+  };
+  Mutex buffer_lock;
+  BufferedExtents buffered_extents;
+  Task *buffered_task_head = nullptr;
+
+  static void init();
+ public:
+  SharedDriverData *get_driver() { return driver; }
+
+ public:
+  aio_callback_t aio_callback;
+  void *aio_callback_priv;
+
+  NVMEDevice(CephContext* cct, aio_callback_t cb, void *cbpriv);
+
+  bool supported_bdev_label() override { return false; }
+
+  void aio_submit(IOContext *ioc) override;
+
+  uint64_t get_size() const override {
+    return size;
+  }
+  uint64_t get_block_size() const override {
+    return block_size;
+  }
+
+  int read(uint64_t off, uint64_t len, bufferlist *pbl,
+           IOContext *ioc,
+           bool buffered) override;
+  int aio_read(
+    uint64_t off,
+    uint64_t len,
+    bufferlist *pbl,
+    IOContext *ioc) override;
+  int aio_write(uint64_t off, bufferlist& bl,
+                IOContext *ioc,
+                bool buffered) override;
+  int write(uint64_t off, bufferlist& bl, bool buffered) override;
+  int flush() override;
+  int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override;
+
+  // for managing buffered readers/writers
+  int invalidate_cache(uint64_t off, uint64_t len) override;
+  int open(const string& path) override;
+  void close() override;
+  int collect_metadata(string prefix, map<string,string> *pm) const override;
+};
+
+#endif