+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-/*
- * Ceph - scalable distributed file system
- *
- * Copyright (C) 2014 Red Hat
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License version 2.1, as published by the Free Software
- * Foundation. See file COPYING.
- *
- */
-
-#ifndef CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
-#define CEPH_OSD_BLUESTORE_BLUESTORE_TYPES_H
-
-#include <ostream>
-#include <bitset>
-#include "include/types.h"
-#include "include/interval_set.h"
-#include "include/utime.h"
-#include "common/hobject.h"
-#include "compressor/Compressor.h"
-#include "common/Checksummer.h"
-#include "include/mempool.h"
-
-namespace ceph {
- class Formatter;
-}
-
-/// label for block device
-struct bluestore_bdev_label_t {
- uuid_d osd_uuid; ///< osd uuid
- uint64_t size; ///< device size
- utime_t btime; ///< birth time
- string description; ///< device description
-
- map<string,string> meta; ///< {read,write}_meta() content from ObjectStore
-
- void encode(bufferlist& bl) const;
- void decode(bufferlist::iterator& p);
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_bdev_label_t*>& o);
-};
-WRITE_CLASS_ENCODER(bluestore_bdev_label_t)
-
-ostream& operator<<(ostream& out, const bluestore_bdev_label_t& l);
-
-/// collection metadata
-struct bluestore_cnode_t {
- uint32_t bits; ///< how many bits of coll pgid are significant
-
- explicit bluestore_cnode_t(int b=0) : bits(b) {}
-
- DENC(bluestore_cnode_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.bits, p);
- DENC_FINISH(p);
- }
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_cnode_t*>& o);
-};
-WRITE_CLASS_DENC(bluestore_cnode_t)
-
-class AllocExtent;
-typedef mempool::bluestore_alloc::vector<AllocExtent> AllocExtentVector;
-class AllocExtent {
-public:
- uint64_t offset;
- uint32_t length;
-
- AllocExtent() {
- offset = 0;
- length = 0;
- }
-
- AllocExtent(int64_t off, int32_t len) : offset(off), length(len) { }
- uint64_t end() const {
- return offset + length;
- }
- bool operator==(const AllocExtent& other) const {
- return offset == other.offset && length == other.length;
- }
-};
-
-inline static ostream& operator<<(ostream& out, const AllocExtent& e) {
- return out << "0x" << std::hex << e.offset << "~" << e.length << std::dec;
-}
-
-class ExtentList {
- AllocExtentVector *m_extents;
- int64_t m_block_size;
- int64_t m_max_blocks;
-
-public:
- void init(AllocExtentVector *extents, int64_t block_size,
- uint64_t max_alloc_size) {
- m_extents = extents;
- m_block_size = block_size;
- m_max_blocks = max_alloc_size / block_size;
- assert(m_extents->empty());
- }
-
- ExtentList(AllocExtentVector *extents, int64_t block_size) {
- init(extents, block_size, 0);
- }
-
- ExtentList(AllocExtentVector *extents, int64_t block_size,
- uint64_t max_alloc_size) {
- init(extents, block_size, max_alloc_size);
- }
-
- void reset() {
- m_extents->clear();
- }
-
- void add_extents(int64_t start, int64_t count);
-
- AllocExtentVector *get_extents() {
- return m_extents;
- }
-
- std::pair<int64_t, int64_t> get_nth_extent(int index) {
- return std::make_pair
- ((*m_extents)[index].offset / m_block_size,
- (*m_extents)[index].length / m_block_size);
- }
-
- int64_t get_extent_count() {
- return m_extents->size();
- }
-};
-
-
-/// pextent: physical extent
-struct bluestore_pextent_t : public AllocExtent {
- const static uint64_t INVALID_OFFSET = ~0ull;
-
- bluestore_pextent_t() : AllocExtent() {}
- bluestore_pextent_t(uint64_t o, uint64_t l) : AllocExtent(o, l) {}
- bluestore_pextent_t(const AllocExtent &ext) :
- AllocExtent(ext.offset, ext.length) { }
-
- bluestore_pextent_t& operator=(const AllocExtent &ext) {
- offset = ext.offset;
- length = ext.length;
- return *this;
- }
- bool is_valid() const {
- return offset != INVALID_OFFSET;
- }
-
- DENC(bluestore_pextent_t, v, p) {
- denc_lba(v.offset, p);
- denc_varint_lowz(v.length, p);
- }
-
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_pextent_t*>& ls);
-};
-WRITE_CLASS_DENC(bluestore_pextent_t)
-
-ostream& operator<<(ostream& out, const bluestore_pextent_t& o);
-
-typedef mempool::bluestore_cache_other::vector<bluestore_pextent_t> PExtentVector;
-
-template<>
-struct denc_traits<PExtentVector> {
- static constexpr bool supported = true;
- static constexpr bool bounded = false;
- static constexpr bool featured = false;
- static constexpr bool need_contiguous = true;
- static void bound_encode(const PExtentVector& v, size_t& p) {
- p += sizeof(uint32_t);
- const auto size = v.size();
- if (size) {
- size_t per = 0;
- denc(v.front(), per);
- p += per * size;
- }
- }
- static void encode(const PExtentVector& v,
- bufferlist::contiguous_appender& p) {
- denc_varint(v.size(), p);
- for (auto& i : v) {
- denc(i, p);
- }
- }
- static void decode(PExtentVector& v, bufferptr::iterator& p) {
- unsigned num;
- denc_varint(num, p);
- v.clear();
- v.resize(num);
- for (unsigned i=0; i<num; ++i) {
- denc(v[i], p);
- }
- }
-};
-
-
-/// extent_map: a map of reference counted extents
-struct bluestore_extent_ref_map_t {
- struct record_t {
- uint32_t length;
- uint32_t refs;
- record_t(uint32_t l=0, uint32_t r=0) : length(l), refs(r) {}
- DENC(bluestore_extent_ref_map_t::record_t, v, p) {
- denc_varint_lowz(v.length, p);
- denc_varint(v.refs, p);
- }
- };
-
- typedef mempool::bluestore_cache_other::map<uint64_t,record_t> map_t;
- map_t ref_map;
-
- void _check() const;
- void _maybe_merge_left(map_t::iterator& p);
-
- void clear() {
- ref_map.clear();
- }
- bool empty() const {
- return ref_map.empty();
- }
-
- void get(uint64_t offset, uint32_t len);
- void put(uint64_t offset, uint32_t len, PExtentVector *release,
- bool *maybe_unshared);
-
- bool contains(uint64_t offset, uint32_t len) const;
- bool intersects(uint64_t offset, uint32_t len) const;
-
- void bound_encode(size_t& p) const {
- denc_varint((uint32_t)0, p);
- if (!ref_map.empty()) {
- size_t elem_size = 0;
- denc_varint_lowz((uint64_t)0, elem_size);
- ref_map.begin()->second.bound_encode(elem_size);
- p += elem_size * ref_map.size();
- }
- }
- void encode(bufferlist::contiguous_appender& p) const {
- uint32_t n = ref_map.size();
- denc_varint(n, p);
- if (n) {
- auto i = ref_map.begin();
- denc_varint_lowz(i->first, p);
- i->second.encode(p);
- int64_t pos = i->first;
- while (--n) {
- ++i;
- denc_varint_lowz((int64_t)i->first - pos, p);
- i->second.encode(p);
- pos = i->first;
- }
- }
- }
- void decode(bufferptr::iterator& p) {
- uint32_t n;
- denc_varint(n, p);
- if (n) {
- int64_t pos;
- denc_varint_lowz(pos, p);
- ref_map[pos].decode(p);
- while (--n) {
- int64_t delta;
- denc_varint_lowz(delta, p);
- pos += delta;
- ref_map[pos].decode(p);
- }
- }
- }
-
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_extent_ref_map_t*>& o);
-};
-WRITE_CLASS_DENC(bluestore_extent_ref_map_t)
-
-
-ostream& operator<<(ostream& out, const bluestore_extent_ref_map_t& rm);
-static inline bool operator==(const bluestore_extent_ref_map_t::record_t& l,
- const bluestore_extent_ref_map_t::record_t& r) {
- return l.length == r.length && l.refs == r.refs;
-}
-static inline bool operator==(const bluestore_extent_ref_map_t& l,
- const bluestore_extent_ref_map_t& r) {
- return l.ref_map == r.ref_map;
-}
-static inline bool operator!=(const bluestore_extent_ref_map_t& l,
- const bluestore_extent_ref_map_t& r) {
- return !(l == r);
-}
-
-/// blob_use_tracker: a set of per-alloc unit ref counters to track blob usage
-struct bluestore_blob_use_tracker_t {
- // N.B.: There is no need to minimize au_size/num_au
- // as much as possible (e.g. have just a single byte for au_size) since:
- // 1) Struct isn't packed hence it's padded. And even if it's packed see 2)
- // 2) Mem manager has its own granularity, most probably >= 8 bytes
- //
- uint32_t au_size; // Allocation (=tracking) unit size,
- // == 0 if uninitialized
- uint32_t num_au; // Amount of allocation units tracked
- // == 0 if single unit or the whole blob is tracked
-
- union {
- uint32_t* bytes_per_au;
- uint32_t total_bytes;
- };
-
- bluestore_blob_use_tracker_t()
- : au_size(0), num_au(0), bytes_per_au(nullptr) {
- }
- ~bluestore_blob_use_tracker_t() {
- clear();
- }
-
- void clear() {
- if (num_au != 0) {
- delete[] bytes_per_au;
- }
- bytes_per_au = 0;
- au_size = 0;
- num_au = 0;
- }
-
- uint32_t get_referenced_bytes() const {
- uint32_t total = 0;
- if (!num_au) {
- total = total_bytes;
- } else {
- for (size_t i = 0; i < num_au; ++i) {
- total += bytes_per_au[i];
- }
- }
- return total;
- }
- bool is_not_empty() const {
- if (!num_au) {
- return total_bytes != 0;
- } else {
- for (size_t i = 0; i < num_au; ++i) {
- if (bytes_per_au[i]) {
- return true;
- }
- }
- }
- return false;
- }
- bool is_empty() const {
- return !is_not_empty();
- }
- void prune_tail(uint32_t new_len) {
- if (num_au) {
- new_len = ROUND_UP_TO(new_len, au_size);
- uint32_t _num_au = new_len / au_size;
- assert(_num_au <= num_au);
- if (_num_au) {
- num_au = _num_au; // bytes_per_au array is left unmodified
-
- } else {
- clear();
- }
- }
- }
- void add_tail(uint32_t new_len, uint32_t _au_size) {
- auto full_size = au_size * (num_au ? num_au : 1);
- assert(new_len >= full_size);
- if (new_len == full_size) {
- return;
- }
- if (!num_au) {
- uint32_t old_total = total_bytes;
- total_bytes = 0;
- init(new_len, _au_size);
- assert(num_au);
- bytes_per_au[0] = old_total;
- } else {
- assert(_au_size == au_size);
- new_len = ROUND_UP_TO(new_len, au_size);
- uint32_t _num_au = new_len / au_size;
- assert(_num_au >= num_au);
- if (_num_au > num_au) {
- auto old_bytes = bytes_per_au;
- auto old_num_au = num_au;
- num_au = _num_au;
- allocate();
- for (size_t i = 0; i < old_num_au; i++) {
- bytes_per_au[i] = old_bytes[i];
- }
- for (size_t i = old_num_au; i < num_au; i++) {
- bytes_per_au[i] = 0;
- }
- delete[] old_bytes;
- }
- }
- }
-
- void init(
- uint32_t full_length,
- uint32_t _au_size);
-
- void get(
- uint32_t offset,
- uint32_t len);
-
- /// put: return true if the blob has no references any more after the call,
- /// no release_units is filled for the sake of performance.
- /// return false if there are some references to the blob,
- /// in this case release_units contains pextents
- /// (identified by their offsets relative to the blob start)
- /// that are not used any more and can be safely deallocated.
- bool put(
- uint32_t offset,
- uint32_t len,
- PExtentVector *release);
-
- bool can_split() const;
- bool can_split_at(uint32_t blob_offset) const;
- void split(
- uint32_t blob_offset,
- bluestore_blob_use_tracker_t* r);
-
- bool equal(
- const bluestore_blob_use_tracker_t& other) const;
-
- void bound_encode(size_t& p) const {
- denc_varint(au_size, p);
- if (au_size) {
- denc_varint(num_au, p);
- if (!num_au) {
- denc_varint(total_bytes, p);
- } else {
- size_t elem_size = 0;
- denc_varint((uint32_t)0, elem_size);
- p += elem_size * num_au;
- }
- }
- }
- void encode(bufferlist::contiguous_appender& p) const {
- denc_varint(au_size, p);
- if (au_size) {
- denc_varint(num_au, p);
- if (!num_au) {
- denc_varint(total_bytes, p);
- } else {
- size_t elem_size = 0;
- denc_varint((uint32_t)0, elem_size);
- for (size_t i = 0; i < num_au; ++i) {
- denc_varint(bytes_per_au[i], p);
- }
- }
- }
- }
- void decode(bufferptr::iterator& p) {
- clear();
- denc_varint(au_size, p);
- if (au_size) {
- denc_varint(num_au, p);
- if (!num_au) {
- denc_varint(total_bytes, p);
- } else {
- allocate();
- for (size_t i = 0; i < num_au; ++i) {
- denc_varint(bytes_per_au[i], p);
- }
- }
- }
- }
-
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_blob_use_tracker_t*>& o);
-private:
- void allocate();
-};
-WRITE_CLASS_DENC(bluestore_blob_use_tracker_t)
-ostream& operator<<(ostream& out, const bluestore_blob_use_tracker_t& rm);
-
-/// blob: a piece of data on disk
-struct bluestore_blob_t {
-private:
- PExtentVector extents; ///< raw data position on device
- uint32_t logical_length = 0; ///< original length of data stored in the blob
- uint32_t compressed_length = 0; ///< compressed length if any
-
-public:
- enum {
- LEGACY_FLAG_MUTABLE = 1, ///< [legacy] blob can be overwritten or split
- FLAG_COMPRESSED = 2, ///< blob is compressed
- FLAG_CSUM = 4, ///< blob has checksums
- FLAG_HAS_UNUSED = 8, ///< blob has unused map
- FLAG_SHARED = 16, ///< blob is shared; see external SharedBlob
- };
- static string get_flags_string(unsigned flags);
-
- uint32_t flags = 0; ///< FLAG_*
-
- typedef uint16_t unused_t;
- unused_t unused = 0; ///< portion that has never been written to (bitmap)
-
- uint8_t csum_type = Checksummer::CSUM_NONE; ///< CSUM_*
- uint8_t csum_chunk_order = 0; ///< csum block size is 1<<block_order bytes
-
- bufferptr csum_data; ///< opaque vector of csum data
-
- bluestore_blob_t(uint32_t f = 0) : flags(f) {}
-
- const PExtentVector& get_extents() const {
- return extents;
- }
-
- DENC_HELPERS;
- void bound_encode(size_t& p, uint64_t struct_v) const {
- assert(struct_v == 1 || struct_v == 2);
- denc(extents, p);
- denc_varint(flags, p);
- denc_varint_lowz(logical_length, p);
- denc_varint_lowz(compressed_length, p);
- denc(csum_type, p);
- denc(csum_chunk_order, p);
- denc_varint(csum_data.length(), p);
- p += csum_data.length();
- p += sizeof(unused_t);
- }
-
- void encode(bufferlist::contiguous_appender& p, uint64_t struct_v) const {
- assert(struct_v == 1 || struct_v == 2);
- denc(extents, p);
- denc_varint(flags, p);
- if (is_compressed()) {
- denc_varint_lowz(logical_length, p);
- denc_varint_lowz(compressed_length, p);
- }
- if (has_csum()) {
- denc(csum_type, p);
- denc(csum_chunk_order, p);
- denc_varint(csum_data.length(), p);
- memcpy(p.get_pos_add(csum_data.length()), csum_data.c_str(),
- csum_data.length());
- }
- if (has_unused()) {
- denc(unused, p);
- }
- }
-
- void decode(bufferptr::iterator& p, uint64_t struct_v) {
- assert(struct_v == 1 || struct_v == 2);
- denc(extents, p);
- denc_varint(flags, p);
- if (is_compressed()) {
- denc_varint_lowz(logical_length, p);
- denc_varint_lowz(compressed_length, p);
- } else {
- logical_length = get_ondisk_length();
- }
- if (has_csum()) {
- denc(csum_type, p);
- denc(csum_chunk_order, p);
- int len;
- denc_varint(len, p);
- csum_data = p.get_ptr(len);
- csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
- }
- if (has_unused()) {
- denc(unused, p);
- }
- }
-
- bool can_split() const {
- return
- !has_flag(FLAG_SHARED) &&
- !has_flag(FLAG_COMPRESSED) &&
- !has_flag(FLAG_HAS_UNUSED); // splitting unused set is complex
- }
- bool can_split_at(uint32_t blob_offset) const {
- return !has_csum() || blob_offset % get_csum_chunk_size() == 0;
- }
-
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_blob_t*>& ls);
-
- bool has_flag(unsigned f) const {
- return flags & f;
- }
- void set_flag(unsigned f) {
- flags |= f;
- }
- void clear_flag(unsigned f) {
- flags &= ~f;
- }
- string get_flags_string() const {
- return get_flags_string(flags);
- }
-
- void set_compressed(uint64_t clen_orig, uint64_t clen) {
- set_flag(FLAG_COMPRESSED);
- logical_length = clen_orig;
- compressed_length = clen;
- }
- bool is_mutable() const {
- return !is_compressed() && !is_shared();
- }
- bool is_compressed() const {
- return has_flag(FLAG_COMPRESSED);
- }
- bool has_csum() const {
- return has_flag(FLAG_CSUM);
- }
- bool has_unused() const {
- return has_flag(FLAG_HAS_UNUSED);
- }
- bool is_shared() const {
- return has_flag(FLAG_SHARED);
- }
-
- /// return chunk (i.e. min readable block) size for the blob
- uint64_t get_chunk_size(uint64_t dev_block_size) const {
- return has_csum() ?
- MAX(dev_block_size, get_csum_chunk_size()) : dev_block_size;
- }
- uint32_t get_csum_chunk_size() const {
- return 1 << csum_chunk_order;
- }
- uint32_t get_compressed_payload_length() const {
- return is_compressed() ? compressed_length : 0;
- }
- uint64_t calc_offset(uint64_t x_off, uint64_t *plen) const {
- auto p = extents.begin();
- assert(p != extents.end());
- while (x_off >= p->length) {
- x_off -= p->length;
- ++p;
- assert(p != extents.end());
- }
- if (plen)
- *plen = p->length - x_off;
- return p->offset + x_off;
- }
-
- // validate whether or not the status of pextents within the given range
- // meets the requirement(allocated or unallocated).
- bool _validate_range(uint64_t b_off, uint64_t b_len,
- bool require_allocated) const {
- auto p = extents.begin();
- assert(p != extents.end());
- while (b_off >= p->length) {
- b_off -= p->length;
- ++p;
- assert(p != extents.end());
- }
- b_len += b_off;
- while (b_len) {
- assert(p != extents.end());
- if (require_allocated != p->is_valid()) {
- return false;
- }
-
- if (p->length >= b_len) {
- return true;
- }
- b_len -= p->length;
- ++p;
- }
- assert(0 == "we should not get here");
- }
-
- /// return true if the entire range is allocated
- /// (mapped to extents on disk)
- bool is_allocated(uint64_t b_off, uint64_t b_len) const {
- return _validate_range(b_off, b_len, true);
- }
-
- /// return true if the entire range is unallocated
- /// (not mapped to extents on disk)
- bool is_unallocated(uint64_t b_off, uint64_t b_len) const {
- return _validate_range(b_off, b_len, false);
- }
-
- /// return true if the logical range has never been used
- bool is_unused(uint64_t offset, uint64_t length) const {
- if (!has_unused()) {
- return false;
- }
- uint64_t blob_len = get_logical_length();
- assert((blob_len % (sizeof(unused)*8)) == 0);
- assert(offset + length <= blob_len);
- uint64_t chunk_size = blob_len / (sizeof(unused)*8);
- uint64_t start = offset / chunk_size;
- uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
- auto i = start;
- while (i < end && (unused & (1u << i))) {
- i++;
- }
- return i >= end;
- }
-
- /// mark a range that has never been used
- void add_unused(uint64_t offset, uint64_t length) {
- uint64_t blob_len = get_logical_length();
- assert((blob_len % (sizeof(unused)*8)) == 0);
- assert(offset + length <= blob_len);
- uint64_t chunk_size = blob_len / (sizeof(unused)*8);
- uint64_t start = ROUND_UP_TO(offset, chunk_size) / chunk_size;
- uint64_t end = (offset + length) / chunk_size;
- for (auto i = start; i < end; ++i) {
- unused |= (1u << i);
- }
- if (start != end) {
- set_flag(FLAG_HAS_UNUSED);
- }
- }
-
- /// indicate that a range has (now) been used.
- void mark_used(uint64_t offset, uint64_t length) {
- if (has_unused()) {
- uint64_t blob_len = get_logical_length();
- assert((blob_len % (sizeof(unused)*8)) == 0);
- assert(offset + length <= blob_len);
- uint64_t chunk_size = blob_len / (sizeof(unused)*8);
- uint64_t start = offset / chunk_size;
- uint64_t end = ROUND_UP_TO(offset + length, chunk_size) / chunk_size;
- for (auto i = start; i < end; ++i) {
- unused &= ~(1u << i);
- }
- if (unused == 0) {
- clear_flag(FLAG_HAS_UNUSED);
- }
- }
- }
-
- int map(uint64_t x_off, uint64_t x_len,
- std::function<int(uint64_t,uint64_t)> f) const {
- auto p = extents.begin();
- assert(p != extents.end());
- while (x_off >= p->length) {
- x_off -= p->length;
- ++p;
- assert(p != extents.end());
- }
- while (x_len > 0) {
- assert(p != extents.end());
- uint64_t l = MIN(p->length - x_off, x_len);
- int r = f(p->offset + x_off, l);
- if (r < 0)
- return r;
- x_off = 0;
- x_len -= l;
- ++p;
- }
- return 0;
- }
- void map_bl(uint64_t x_off,
- bufferlist& bl,
- std::function<void(uint64_t,bufferlist&)> f) const {
- auto p = extents.begin();
- assert(p != extents.end());
- while (x_off >= p->length) {
- x_off -= p->length;
- ++p;
- assert(p != extents.end());
- }
- bufferlist::iterator it = bl.begin();
- uint64_t x_len = bl.length();
- while (x_len > 0) {
- assert(p != extents.end());
- uint64_t l = MIN(p->length - x_off, x_len);
- bufferlist t;
- it.copy(l, t);
- f(p->offset + x_off, t);
- x_off = 0;
- x_len -= l;
- ++p;
- }
- }
-
- uint32_t get_ondisk_length() const {
- uint32_t len = 0;
- for (auto &p : extents) {
- len += p.length;
- }
- return len;
- }
-
- uint32_t get_logical_length() const {
- return logical_length;
- }
- size_t get_csum_value_size() const;
-
- size_t get_csum_count() const {
- size_t vs = get_csum_value_size();
- if (!vs)
- return 0;
- return csum_data.length() / vs;
- }
- uint64_t get_csum_item(unsigned i) const {
- size_t cs = get_csum_value_size();
- const char *p = csum_data.c_str();
- switch (cs) {
- case 0:
- assert(0 == "no csum data, bad index");
- case 1:
- return reinterpret_cast<const uint8_t*>(p)[i];
- case 2:
- return reinterpret_cast<const __le16*>(p)[i];
- case 4:
- return reinterpret_cast<const __le32*>(p)[i];
- case 8:
- return reinterpret_cast<const __le64*>(p)[i];
- default:
- assert(0 == "unrecognized csum word size");
- }
- }
- const char *get_csum_item_ptr(unsigned i) const {
- size_t cs = get_csum_value_size();
- return csum_data.c_str() + (cs * i);
- }
- char *get_csum_item_ptr(unsigned i) {
- size_t cs = get_csum_value_size();
- return csum_data.c_str() + (cs * i);
- }
-
- void init_csum(unsigned type, unsigned order, unsigned len) {
- flags |= FLAG_CSUM;
- csum_type = type;
- csum_chunk_order = order;
- csum_data = buffer::create(get_csum_value_size() * len / get_csum_chunk_size());
- csum_data.zero();
- csum_data.reassign_to_mempool(mempool::mempool_bluestore_cache_other);
- }
-
- /// calculate csum for the buffer at the given b_off
- void calc_csum(uint64_t b_off, const bufferlist& bl);
-
- /// verify csum: return -EOPNOTSUPP for unsupported checksum type;
- /// return -1 and valid(nonnegative) b_bad_off for checksum error;
- /// return 0 if all is well.
- int verify_csum(uint64_t b_off, const bufferlist& bl, int* b_bad_off,
- uint64_t *bad_csum) const;
-
- bool can_prune_tail() const {
- return
- extents.size() > 1 && // if it's all invalid it's not pruning.
- !extents.back().is_valid() &&
- !has_unused();
- }
- void prune_tail() {
- const auto &p = extents.back();
- logical_length -= p.length;
- extents.pop_back();
- if (has_csum()) {
- bufferptr t;
- t.swap(csum_data);
- csum_data = bufferptr(t.c_str(),
- get_logical_length() / get_csum_chunk_size() *
- get_csum_value_size());
- }
- }
- void add_tail(uint32_t new_len) {
- assert(is_mutable());
- assert(!has_unused());
- assert(new_len > logical_length);
- extents.emplace_back(
- bluestore_pextent_t(
- bluestore_pextent_t::INVALID_OFFSET,
- new_len - logical_length));
- logical_length = new_len;
- if (has_csum()) {
- bufferptr t;
- t.swap(csum_data);
- csum_data = buffer::create(
- get_csum_value_size() * logical_length / get_csum_chunk_size());
- csum_data.copy_in(0, t.length(), t.c_str());
- csum_data.zero(t.length(), csum_data.length() - t.length());
- }
- }
- uint32_t get_release_size(uint32_t min_alloc_size) const {
- if (is_compressed()) {
- return get_logical_length();
- }
- uint32_t res = get_csum_chunk_size();
- if (!has_csum() || res < min_alloc_size) {
- res = min_alloc_size;
- }
- return res;
- }
-
- void split(uint32_t blob_offset, bluestore_blob_t& rb);
- void allocated(uint32_t b_off, uint32_t length, const AllocExtentVector& allocs);
- void allocated_test(const bluestore_pextent_t& alloc); // intended for UT only
-
- /// updates blob's pextents container and return unused pextents eligible
- /// for release.
- /// all - indicates that the whole blob to be released.
- /// logical - specifies set of logical extents within blob's
- /// to be released
- /// Returns true if blob has no more valid pextents
- bool release_extents(
- bool all,
- const PExtentVector& logical,
- PExtentVector* r);
-};
-WRITE_CLASS_DENC_FEATURED(bluestore_blob_t)
-
-ostream& operator<<(ostream& out, const bluestore_blob_t& o);
-
-
-/// shared blob state
-struct bluestore_shared_blob_t {
- uint64_t sbid; ///> shared blob id
- bluestore_extent_ref_map_t ref_map; ///< shared blob extents
-
- bluestore_shared_blob_t(uint64_t _sbid) : sbid(_sbid) {}
-
- DENC(bluestore_shared_blob_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.ref_map, p);
- DENC_FINISH(p);
- }
-
-
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_shared_blob_t*>& ls);
-
- bool empty() const {
- return ref_map.empty();
- }
-};
-WRITE_CLASS_DENC(bluestore_shared_blob_t)
-
-ostream& operator<<(ostream& out, const bluestore_shared_blob_t& o);
-
-/// onode: per-object metadata
-struct bluestore_onode_t {
- uint64_t nid = 0; ///< numeric id (locally unique)
- uint64_t size = 0; ///< object size
- map<mempool::bluestore_cache_other::string, bufferptr> attrs; ///< attrs
-
- struct shard_info {
- uint32_t offset = 0; ///< logical offset for start of shard
- uint32_t bytes = 0; ///< encoded bytes
- DENC(shard_info, v, p) {
- denc_varint(v.offset, p);
- denc_varint(v.bytes, p);
- }
- void dump(Formatter *f) const;
- };
- vector<shard_info> extent_map_shards; ///< extent map shards (if any)
-
- uint32_t expected_object_size = 0;
- uint32_t expected_write_size = 0;
- uint32_t alloc_hint_flags = 0;
-
- uint8_t flags = 0;
-
- enum {
- FLAG_OMAP = 1,
- };
-
- string get_flags_string() const {
- string s;
- if (flags & FLAG_OMAP) {
- s = "omap";
- }
- return s;
- }
-
- bool has_flag(unsigned f) const {
- return flags & f;
- }
-
- void set_flag(unsigned f) {
- flags |= f;
- }
-
- void clear_flag(unsigned f) {
- flags &= ~f;
- }
-
- bool has_omap() const {
- return has_flag(FLAG_OMAP);
- }
-
- void set_omap_flag() {
- set_flag(FLAG_OMAP);
- }
-
- void clear_omap_flag() {
- clear_flag(FLAG_OMAP);
- }
-
- DENC(bluestore_onode_t, v, p) {
- DENC_START(1, 1, p);
- denc_varint(v.nid, p);
- denc_varint(v.size, p);
- denc(v.attrs, p);
- denc(v.flags, p);
- denc(v.extent_map_shards, p);
- denc_varint(v.expected_object_size, p);
- denc_varint(v.expected_write_size, p);
- denc_varint(v.alloc_hint_flags, p);
- DENC_FINISH(p);
- }
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_onode_t*>& o);
-};
-WRITE_CLASS_DENC(bluestore_onode_t::shard_info)
-WRITE_CLASS_DENC(bluestore_onode_t)
-
-ostream& operator<<(ostream& out, const bluestore_onode_t::shard_info& si);
-
-/// writeahead-logged op
-struct bluestore_deferred_op_t {
- typedef enum {
- OP_WRITE = 1,
- } type_t;
- __u8 op = 0;
-
- PExtentVector extents;
- bufferlist data;
-
- DENC(bluestore_deferred_op_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.op, p);
- denc(v.extents, p);
- denc(v.data, p);
- DENC_FINISH(p);
- }
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_deferred_op_t*>& o);
-};
-WRITE_CLASS_DENC(bluestore_deferred_op_t)
-
-
-/// writeahead-logged transaction
-struct bluestore_deferred_transaction_t {
- uint64_t seq = 0;
- list<bluestore_deferred_op_t> ops;
- interval_set<uint64_t> released; ///< allocations to release after tx
-
- bluestore_deferred_transaction_t() : seq(0) {}
-
- DENC(bluestore_deferred_transaction_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.seq, p);
- denc(v.ops, p);
- denc(v.released, p);
- DENC_FINISH(p);
- }
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_deferred_transaction_t*>& o);
-};
-WRITE_CLASS_DENC(bluestore_deferred_transaction_t)
-
-struct bluestore_compression_header_t {
- uint8_t type = Compressor::COMP_ALG_NONE;
- uint32_t length = 0;
-
- bluestore_compression_header_t() {}
- bluestore_compression_header_t(uint8_t _type)
- : type(_type) {}
-
- DENC(bluestore_compression_header_t, v, p) {
- DENC_START(1, 1, p);
- denc(v.type, p);
- denc(v.length, p);
- DENC_FINISH(p);
- }
- void dump(Formatter *f) const;
- static void generate_test_instances(list<bluestore_compression_header_t*>& o);
-};
-WRITE_CLASS_DENC(bluestore_compression_header_t)
-
-
-#endif