1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #if defined(__linux__) || defined(__FreeBSD__)
23 # define _XOPEN_SOURCE 600
29 #if defined(__linux__) // For malloc(2).
38 # include <sys/mman.h>
47 #include <type_traits>
51 #include "buffer_fwd.h"
54 # include "include/assert.h"
59 #include "inline_memory.h"
62 #define CEPH_BUFFER_API __attribute__ ((visibility ("default")))
64 #define CEPH_BUFFER_API
69 class XioDispatchHook;
75 namespace buffer CEPH_BUFFER_API {
80 struct error : public std::exception{
81 const char *what() const throw () override;
83 struct bad_alloc : public error {
84 const char *what() const throw () override;
86 struct end_of_buffer : public error {
87 const char *what() const throw () override;
89 struct malformed_input : public error {
90 explicit malformed_input(const std::string& w) {
91 snprintf(buf, sizeof(buf), "buffer::malformed_input: %s", w.c_str());
93 const char *what() const throw () override;
97 struct error_code : public malformed_input {
98 explicit error_code(int error);
103 /// total bytes allocated
104 int get_total_alloc();
106 /// history total bytes allocated
107 uint64_t get_history_alloc_bytes();
109 /// total num allocated
110 uint64_t get_history_alloc_num();
112 /// enable/disable alloc tracking
113 void track_alloc(bool b);
115 /// count of cached crc hits (matching input)
116 int get_cached_crc();
117 /// count of cached crc hits (mismatching input, required adjustment)
118 int get_cached_crc_adjusted();
119 /// count of crc cache misses
120 int get_missed_crc();
121 /// enable/disable tracking of cached crcs
122 void track_cached_crc(bool b);
124 /// count of calls to buffer::ptr::c_str()
125 int get_c_str_accesses();
126 /// enable/disable tracking of buffer::ptr::c_str() calls
127 void track_c_str(bool b);
130 * an abstract raw buffer. with a reference count.
135 class raw_mmap_pages;
136 class raw_posix_aligned;
137 class raw_hack_aligned;
139 class raw_claimed_char;
141 class raw_unshareable; // diagnostic, unshareable char buffer
143 class raw_claim_buffer;
147 class xio_msg_buffer;
152 raw* copy(const char *c, unsigned len);
153 raw* create(unsigned len);
154 raw* create_in_mempool(unsigned len, int mempool);
155 raw* claim_char(unsigned len, char *buf);
156 raw* create_malloc(unsigned len);
157 raw* claim_malloc(unsigned len, char *buf);
158 raw* create_static(unsigned len, char *buf);
159 raw* create_aligned(unsigned len, unsigned align);
160 raw* create_aligned_in_mempool(unsigned len, unsigned align, int mempool);
161 raw* create_page_aligned(unsigned len);
162 raw* create_zero_copy(unsigned len, int fd, int64_t *offset);
163 raw* create_unshareable(unsigned len);
164 raw* create_static(unsigned len, char *buf);
165 raw* claim_buffer(unsigned len, char *buf, deleter del);
167 #if defined(HAVE_XIO)
168 raw* create_msg(unsigned len, char *buf, XioDispatchHook *m_hook);
172 * a buffer pointer. references (a subsequence of) a raw buffer.
174 class CEPH_BUFFER_API ptr {
182 const ptr *bp; ///< parent ptr
183 const char *start; ///< starting pointer into bp->c_str()
184 const char *pos; ///< pointer into bp->c_str()
185 const char *end_ptr; ///< pointer to bp->end_c_str()
186 bool deep; ///< if true, no not allow shallow ptr copies
188 iterator(const ptr *p, size_t offset, bool d)
190 start(p->c_str() + offset),
192 end_ptr(p->end_c_str()),
198 const char *get_pos_add(size_t n) {
202 throw end_of_buffer();
206 ptr get_ptr(size_t len) {
208 return buffer::copy(get_pos_add(len), len);
210 size_t off = pos - bp->c_str();
213 throw end_of_buffer();
214 return ptr(*bp, off, len);
217 ptr get_preceding_ptr(size_t len) {
219 return buffer::copy(get_pos() - len, len);
221 size_t off = pos - bp->c_str();
222 return ptr(*bp, off - len, len);
226 void advance(size_t len) {
229 throw end_of_buffer();
232 const char *get_pos() {
235 const char *get_end() {
239 size_t get_offset() {
244 return pos == end_ptr;
248 ptr() : _raw(0), _off(0), _len(0) {}
249 // cppcheck-suppress noExplicitConstructor
251 // cppcheck-suppress noExplicitConstructor
253 ptr(const char *d, unsigned l);
255 ptr(ptr&& p) noexcept;
256 ptr(const ptr& p, unsigned o, unsigned l);
257 ptr& operator= (const ptr& p);
258 ptr& operator= (ptr&& p) noexcept;
263 bool have_raw() const { return _raw ? true:false; }
266 void swap(ptr& other);
267 ptr& make_shareable();
269 iterator begin(size_t offset=0) const {
270 return iterator(this, offset, false);
272 iterator begin_deep(size_t offset=0) const {
273 return iterator(this, offset, true);
277 bool at_buffer_head() const { return _off == 0; }
278 bool at_buffer_tail() const;
280 bool is_aligned(unsigned align) const {
281 return ((long)c_str() & (align-1)) == 0;
283 bool is_page_aligned() const { return is_aligned(CEPH_PAGE_SIZE); }
284 bool is_n_align_sized(unsigned align) const
286 return (length() % align) == 0;
288 bool is_n_page_sized() const { return is_n_align_sized(CEPH_PAGE_SIZE); }
289 bool is_partial() const {
290 return have_raw() && (start() > 0 || end() < raw_length());
293 int get_mempool() const;
294 void reassign_to_mempool(int pool);
295 void try_assign_to_mempool(int pool);
298 raw *get_raw() const { return _raw; }
299 const char *c_str() const;
301 const char *end_c_str() const;
303 unsigned length() const { return _len; }
304 unsigned offset() const { return _off; }
305 unsigned start() const { return _off; }
306 unsigned end() const { return _off + _len; }
307 unsigned unused_tail_length() const;
308 const char& operator[](unsigned n) const;
309 char& operator[](unsigned n);
311 const char *raw_c_str() const;
312 unsigned raw_length() const;
313 int raw_nref() const;
315 void copy_out(unsigned o, unsigned l, char *dest) const;
317 bool can_zero_copy() const;
318 int zero_copy_to_fd(int fd, int64_t *offset) const;
320 unsigned wasted() const;
322 int cmp(const ptr& o) const;
323 bool is_zero() const;
326 void set_offset(unsigned o) {
327 assert(raw_length() >= o);
330 void set_length(unsigned l) {
331 assert(raw_length() >= l);
335 unsigned append(char c);
336 unsigned append(const char *p, unsigned l);
337 void copy_in(unsigned o, unsigned l, const char *src);
338 void copy_in(unsigned o, unsigned l, const char *src, bool crc_reset);
340 void zero(bool crc_reset);
341 void zero(unsigned o, unsigned l);
342 void zero(unsigned o, unsigned l, bool crc_reset);
348 * list - the useful bit!
351 class CEPH_BUFFER_API list {
353 std::list<ptr> _buffers;
355 unsigned _memcopy_count; //the total of memcopy using rebuild().
356 ptr append_buffer; // where i put small appends.
362 template <bool is_const>
363 class CEPH_BUFFER_API iterator_impl
364 : public std::iterator<std::forward_iterator_tag, char> {
366 typedef typename std::conditional<is_const,
369 typedef typename std::conditional<is_const,
370 const std::list<ptr>,
371 std::list<ptr> >::type list_t;
372 typedef typename std::conditional<is_const,
373 typename std::list<ptr>::const_iterator,
374 typename std::list<ptr>::iterator>::type list_iter_t;
376 list_t* ls; // meh.. just here to avoid an extra pointer dereference..
377 unsigned off; // in bl
379 unsigned p_off; // in *p
380 friend class iterator_impl<true>;
383 // constructor. position.
385 : bl(0), ls(0), off(0), p_off(0) {}
386 iterator_impl(bl_t *l, unsigned o=0);
387 iterator_impl(bl_t *l, unsigned o, list_iter_t ip, unsigned po)
388 : bl(l), ls(&bl->_buffers), off(o), p(ip), p_off(po) {}
389 iterator_impl(const list::iterator& i);
391 /// get current iterator offset in buffer::list
392 unsigned get_off() const { return off; }
394 /// get number of bytes remaining from iterator position to the end of the buffer::list
395 unsigned get_remaining() const { return bl->length() - off; }
397 /// true if iterator is at the end of the buffer::list
399 return p == ls->end();
400 //return off == bl->length();
404 void seek(unsigned o);
405 char operator*() const;
406 iterator_impl& operator++();
407 ptr get_current_ptr() const;
409 bl_t& get_bl() const { return *bl; }
412 // note that these all _append_ to dest!
413 void copy(unsigned len, char *dest);
414 // deprecated, use copy_deep()
415 void copy(unsigned len, ptr &dest) __attribute__((deprecated));
416 void copy_deep(unsigned len, ptr &dest);
417 void copy_shallow(unsigned len, ptr &dest);
418 void copy(unsigned len, list &dest);
419 void copy(unsigned len, std::string &dest);
420 void copy_all(list &dest);
422 // get a pointer to the currenet iterator position, return the
423 // number of bytes we can read from that position (up to want),
424 // and advance the iterator by that amount.
425 size_t get_ptr_and_advance(size_t want, const char **p);
427 /// calculate crc from iterator position
428 uint32_t crc32c(size_t length, uint32_t crc);
430 friend bool operator==(const iterator_impl& lhs,
431 const iterator_impl& rhs) {
432 return &lhs.get_bl() == &rhs.get_bl() && lhs.get_off() == rhs.get_off();
434 friend bool operator!=(const iterator_impl& lhs,
435 const iterator_impl& rhs) {
436 return &lhs.get_bl() != &rhs.get_bl() || lhs.get_off() != rhs.get_off();
441 typedef iterator_impl<true> const_iterator;
443 class CEPH_BUFFER_API iterator : public iterator_impl<false> {
445 iterator() = default;
446 iterator(bl_t *l, unsigned o=0);
447 iterator(bl_t *l, unsigned o, list_iter_t ip, unsigned po);
450 void seek(unsigned o);
451 using iterator_impl<false>::operator*;
453 iterator& operator++();
454 ptr get_current_ptr();
457 void copy(unsigned len, char *dest);
458 // deprecated, use copy_deep()
459 void copy(unsigned len, ptr &dest) __attribute__((deprecated));
460 void copy_deep(unsigned len, ptr &dest);
461 void copy_shallow(unsigned len, ptr &dest);
462 void copy(unsigned len, list &dest);
463 void copy(unsigned len, std::string &dest);
464 void copy_all(list &dest);
467 void copy_in(unsigned len, const char *src);
468 void copy_in(unsigned len, const char *src, bool crc_reset);
469 void copy_in(unsigned len, const list& otherl);
471 bool operator==(const iterator& rhs) const {
472 return bl == rhs.bl && off == rhs.off;
474 bool operator!=(const iterator& rhs) const {
475 return bl != rhs.bl || off != rhs.off;
479 class contiguous_appender {
485 /// running count of bytes appended that are not reflected by @pos
486 size_t out_of_band_offset = 0;
488 contiguous_appender(bufferlist *l, size_t len, bool d)
491 size_t unused = pbl->append_buffer.unused_tail_length();
493 // note: if len < the normal append_buffer size it *might*
494 // be better to allocate a normal-sized append_buffer and
495 // use part of it. however, that optimizes for the case of
496 // old-style types including new-style types. and in most
497 // such cases, this won't be the very first thing encoded to
498 // the list, so append_buffer will already be allocated.
499 // OTOH if everything is new-style, we *should* allocate
500 // only what we need and conserve memory.
501 bp = buffer::create(len);
504 pos = pbl->append_buffer.end_c_str();
508 void flush_and_continue() {
510 // we allocated a new buffer
511 size_t l = pos - bp.c_str();
512 pbl->append(bufferptr(bp, 0, l));
513 bp.set_length(bp.length() - l);
514 bp.set_offset(bp.offset() + l);
516 // we are using pbl's append_buffer
517 size_t l = pos - pbl->append_buffer.end_c_str();
519 pbl->append_buffer.set_length(pbl->append_buffer.length() + l);
520 pbl->append(pbl->append_buffer, pbl->append_buffer.end() - l, l);
521 pos = pbl->append_buffer.end_c_str();
529 ~contiguous_appender() {
531 // we allocated a new buffer
532 bp.set_length(pos - bp.c_str());
533 pbl->append(std::move(bp));
535 // we are using pbl's append_buffer
536 size_t l = pos - pbl->append_buffer.end_c_str();
538 pbl->append_buffer.set_length(pbl->append_buffer.length() + l);
539 pbl->append(pbl->append_buffer, pbl->append_buffer.end() - l, l);
544 size_t get_out_of_band_offset() const {
545 return out_of_band_offset;
547 void append(const char *p, size_t l) {
548 maybe_inline_memcpy(pos, p, l, 16);
551 char *get_pos_add(size_t len) {
560 void append(const bufferptr& p) {
565 append(p.c_str(), p.length());
567 flush_and_continue();
569 out_of_band_offset += p.length();
572 void append(const bufferlist& l) {
577 for (const auto &p : l._buffers) {
578 append(p.c_str(), p.length());
581 flush_and_continue();
583 out_of_band_offset += l.length();
587 size_t get_logical_offset() {
589 return out_of_band_offset + (pos - bp.c_str());
591 return out_of_band_offset + (pos - pbl->append_buffer.end_c_str());
596 contiguous_appender get_contiguous_appender(size_t len, bool deep=false) {
597 return contiguous_appender(this, len, deep);
600 class page_aligned_appender {
607 page_aligned_appender(list *l, unsigned min_pages)
609 min_alloc(min_pages * CEPH_PAGE_SIZE),
610 pos(nullptr), end(nullptr) {}
615 ~page_aligned_appender() {
620 if (pos && pos != buffer.c_str()) {
621 size_t len = pos - buffer.c_str();
622 pbl->append(buffer, 0, len);
623 buffer.set_length(buffer.length() - len);
624 buffer.set_offset(buffer.offset() + len);
628 void append(const char *buf, size_t len) {
631 size_t alloc = (len + CEPH_PAGE_SIZE - 1) & CEPH_PAGE_MASK;
632 if (alloc < min_alloc) {
635 buffer = create_page_aligned(alloc);
636 pos = buffer.c_str();
637 end = buffer.end_c_str();
640 if (l > (size_t)(end - pos)) {
648 pbl->append(buffer, 0, buffer.length());
655 page_aligned_appender get_page_aligned_appender(unsigned min_pages=1) {
656 return page_aligned_appender(this, min_pages);
660 mutable iterator last_p;
661 int zero_copy_to_fd(int fd) const;
665 list() : _len(0), _memcopy_count(0), last_p(this) {}
666 // cppcheck-suppress noExplicitConstructor
667 list(unsigned prealloc) : _len(0), _memcopy_count(0), last_p(this) {
671 list(const list& other) : _buffers(other._buffers), _len(other._len),
672 _memcopy_count(other._memcopy_count), last_p(this) {
676 list& operator= (const list& other) {
677 if (this != &other) {
678 _buffers = other._buffers;
685 list& operator= (list&& other) {
686 _buffers = std::move(other._buffers);
688 _memcopy_count = other._memcopy_count;
690 append_buffer.swap(other.append_buffer);
695 unsigned get_num_buffers() const { return _buffers.size(); }
696 const ptr& front() const { return _buffers.front(); }
697 const ptr& back() const { return _buffers.back(); }
699 int get_mempool() const;
700 void reassign_to_mempool(int pool);
701 void try_assign_to_mempool(int pool);
703 size_t get_append_buffer_unused_tail_length() const {
704 return append_buffer.unused_tail_length();
707 unsigned get_memcopy_count() const {return _memcopy_count; }
708 const std::list<ptr>& buffers() const { return _buffers; }
709 void swap(list& other);
710 unsigned length() const {
712 // DEBUG: verify _len
714 for (std::list<ptr>::const_iterator it = _buffers.begin();
715 it != _buffers.end();
717 len += (*it).length();
724 bool contents_equal(buffer::list& other);
725 bool contents_equal(const buffer::list& other) const;
727 bool can_zero_copy() const;
728 bool is_provided_buffer(const char *dst) const;
729 bool is_aligned(unsigned align) const;
730 bool is_page_aligned() const;
731 bool is_n_align_sized(unsigned align) const;
732 bool is_n_page_sized() const;
733 bool is_aligned_size_and_memory(unsigned align_size,
734 unsigned align_memory) const;
736 bool is_zero() const;
744 append_buffer = ptr();
746 void push_front(ptr& bp) {
747 if (bp.length() == 0)
749 _buffers.push_front(bp);
752 void push_front(ptr&& bp) {
753 if (bp.length() == 0)
756 _buffers.push_front(std::move(bp));
758 void push_front(raw *r) {
761 void push_back(const ptr& bp) {
762 if (bp.length() == 0)
764 _buffers.push_back(bp);
767 void push_back(ptr&& bp) {
768 if (bp.length() == 0)
771 _buffers.push_back(std::move(bp));
773 void push_back(raw *r) {
778 void zero(unsigned o, unsigned l);
780 bool is_contiguous() const;
782 void rebuild(ptr& nb);
783 bool rebuild_aligned(unsigned align);
784 bool rebuild_aligned_size_and_memory(unsigned align_size,
785 unsigned align_memory);
786 bool rebuild_page_aligned();
788 void reserve(size_t prealloc);
790 // assignment-op with move semantics
791 const static unsigned int CLAIM_DEFAULT = 0;
792 const static unsigned int CLAIM_ALLOW_NONSHAREABLE = 1;
794 void claim(list& bl, unsigned int flags = CLAIM_DEFAULT);
795 void claim_append(list& bl, unsigned int flags = CLAIM_DEFAULT);
796 void claim_prepend(list& bl, unsigned int flags = CLAIM_DEFAULT);
797 // only for bl is bufferlist::page_aligned_appender
798 void claim_append_piecewise(list& bl);
800 // clone non-shareable buffers (make shareable)
801 void make_shareable() {
802 std::list<buffer::ptr>::iterator pb;
803 for (pb = _buffers.begin(); pb != _buffers.end(); ++pb) {
804 (void) pb->make_shareable();
808 // copy with explicit volatile-sharing semantics
809 void share(const list& bl)
813 std::list<buffer::ptr>::const_iterator pb;
814 for (pb = bl._buffers.begin(); pb != bl._buffers.end(); ++pb) {
821 return iterator(this, 0);
824 return iterator(this, _len, _buffers.end(), 0);
827 const_iterator begin() const {
828 return const_iterator(this, 0);
830 const_iterator end() const {
831 return const_iterator(this, _len, _buffers.end(), 0);
835 // **** WARNING: this are horribly inefficient for large bufferlists. ****
836 void copy(unsigned off, unsigned len, char *dest) const;
837 void copy(unsigned off, unsigned len, list &dest) const;
838 void copy(unsigned off, unsigned len, std::string& dest) const;
839 void copy_in(unsigned off, unsigned len, const char *src);
840 void copy_in(unsigned off, unsigned len, const char *src, bool crc_reset);
841 void copy_in(unsigned off, unsigned len, const list& src);
844 void append(const char *data, unsigned len);
845 void append(const std::string& s) {
846 append(s.data(), s.length());
848 void append(const ptr& bp);
849 void append(ptr&& bp);
850 void append(const ptr& bp, unsigned off, unsigned len);
851 void append(const list& bl);
852 void append(std::istream& in);
853 void append_zero(unsigned len);
854 void prepend_zero(unsigned len);
859 const char& operator[](unsigned n) const;
861 std::string to_str() const;
863 void substr_of(const list& other, unsigned off, unsigned len);
865 /// return a pointer to a contiguous extent of the buffer,
866 /// reallocating as needed
867 char *get_contiguous(unsigned off, ///< offset
868 unsigned len); ///< length
871 void splice(unsigned off, unsigned len, list *claim_by=0 /*, bufferlist& replace_with */);
872 void write(int off, int len, std::ostream& out) const;
874 void encode_base64(list& o);
875 void decode_base64(list& o);
877 void write_stream(std::ostream &out) const;
878 void hexdump(std::ostream &out, bool trailing_newline = true) const;
879 int read_file(const char *fn, std::string *error);
880 ssize_t read_fd(int fd, size_t len);
881 int read_fd_zero_copy(int fd, size_t len);
882 int write_file(const char *fn, int mode=0644);
883 int write_fd(int fd) const;
884 int write_fd(int fd, uint64_t offset) const;
885 int write_fd_zero_copy(int fd) const;
886 template<typename VectorT>
887 void prepare_iov(VectorT *piov) const {
888 assert(_buffers.size() <= IOV_MAX);
889 piov->resize(_buffers.size());
891 for (auto& p : _buffers) {
892 (*piov)[n].iov_base = (void *)p.c_str();
893 (*piov)[n].iov_len = p.length();
897 uint32_t crc32c(uint32_t crc) const;
898 void invalidate_crc();
900 // These functions return a bufferlist with a pointer to a single
901 // static buffer. They /must/ not outlive the memory they
903 static list static_from_mem(char* c, size_t l);
904 static list static_from_cstring(char* c);
905 static list static_from_string(std::string& s);
909 * efficient hash of one or more bufferlists
917 // cppcheck-suppress noExplicitConstructor
918 hash(uint32_t init) : crc(init) { }
920 void update(const buffer::list& bl) {
921 crc = bl.crc32c(crc);
929 inline bool operator>(bufferlist& l, bufferlist& r) {
930 for (unsigned p = 0; ; p++) {
931 if (l.length() > p && r.length() == p) return true;
932 if (l.length() == p) return false;
933 if (l[p] > r[p]) return true;
934 if (l[p] < r[p]) return false;
937 inline bool operator>=(bufferlist& l, bufferlist& r) {
938 for (unsigned p = 0; ; p++) {
939 if (l.length() > p && r.length() == p) return true;
940 if (r.length() == p && l.length() == p) return true;
941 if (l.length() == p && r.length() > p) return false;
942 if (l[p] > r[p]) return true;
943 if (l[p] < r[p]) return false;
947 inline bool operator==(const bufferlist &l, const bufferlist &r) {
948 if (l.length() != r.length())
950 for (unsigned p = 0; p < l.length(); p++) {
956 inline bool operator<(bufferlist& l, bufferlist& r) {
959 inline bool operator<=(bufferlist& l, bufferlist& r) {
964 std::ostream& operator<<(std::ostream& out, const buffer::ptr& bp);
966 std::ostream& operator<<(std::ostream& out, const raw &r);
968 std::ostream& operator<<(std::ostream& out, const buffer::list& bl);
970 std::ostream& operator<<(std::ostream& out, const buffer::error& e);
972 inline bufferhash& operator<<(bufferhash& l, const bufferlist &r) {
979 #if defined(HAVE_XIO)
980 xio_reg_mem* get_xio_mp(const buffer::ptr& bp);