X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Ftools%2FRadosDump.h;fp=src%2Fceph%2Fsrc%2Ftools%2FRadosDump.h;h=6ad43f7aa63b0a48791ce89e3a39f8bb68080a8d;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/tools/RadosDump.h b/src/ceph/src/tools/RadosDump.h new file mode 100644 index 0000000..6ad43f7 --- /dev/null +++ b/src/ceph/src/tools/RadosDump.h @@ -0,0 +1,407 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2015 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef RADOS_DUMP_H_ +#define RADOS_DUMP_H_ + +#include + +#include "include/buffer.h" +#include "include/encoding.h" + +#include "osd/osd_types.h" +#include "osd/OSDMap.h" + +typedef uint8_t sectiontype_t; +typedef uint32_t mymagic_t; +typedef int64_t mysize_t; + +enum { + TYPE_NONE = 0, + TYPE_PG_BEGIN, + TYPE_PG_END, + TYPE_OBJECT_BEGIN, + TYPE_OBJECT_END, + TYPE_DATA, + TYPE_ATTRS, + TYPE_OMAP_HDR, + TYPE_OMAP, + TYPE_PG_METADATA, + TYPE_POOL_BEGIN, + TYPE_POOL_END, + END_OF_TYPES, //Keep at the end +}; + +const uint16_t shortmagic = 0xffce; //goes into stream as "ceff" +//endmagic goes into stream as "ceff ffec" +const mymagic_t endmagic = (0xecff << 16) | shortmagic; + +//The first FIXED_LENGTH bytes are a fixed +//portion of the export output. This includes the overall +//version number, and size of header and footer. +//THIS STRUCTURE CAN ONLY BE APPENDED TO. If it needs to expand, +//the version can be bumped and then anything +//can be added to the export format. +struct super_header { + static const uint32_t super_magic = (shortmagic << 16) | shortmagic; + // ver = 1, Initial version + // ver = 2, Add OSDSuperblock to pg_begin + static const uint32_t super_ver = 2; + static const uint32_t FIXED_LENGTH = 16; + uint32_t magic; + uint32_t version; + uint32_t header_size; + uint32_t footer_size; + + super_header() : magic(0), version(0), header_size(0), footer_size(0) { } + + void encode(bufferlist& bl) const { + ::encode(magic, bl); + ::encode(version, bl); + ::encode(header_size, bl); + ::encode(footer_size, bl); + } + void decode(bufferlist::iterator& bl) { + ::decode(magic, bl); + ::decode(version, bl); + ::decode(header_size, bl); + ::decode(footer_size, bl); + } +}; + +struct header { + sectiontype_t type; + mysize_t size; + header(sectiontype_t type, mysize_t size) : + type(type), size(size) { } + header(): type(0), size(0) { } + + void encode(bufferlist& bl) const { + uint32_t debug_type = (type << 24) | (type << 16) | shortmagic; + ENCODE_START(1, 1, bl); + ::encode(debug_type, bl); + ::encode(size, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + uint32_t debug_type; + DECODE_START(1, bl); + ::decode(debug_type, bl); + type = debug_type >> 24; + ::decode(size, bl); + DECODE_FINISH(bl); + } +}; + +struct footer { + mymagic_t magic; + footer() : magic(endmagic) { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(magic, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(magic, bl); + DECODE_FINISH(bl); + } +}; + +struct pg_begin { + spg_t pgid; + OSDSuperblock superblock; + + pg_begin(spg_t pg, const OSDSuperblock& sb): + pgid(pg), superblock(sb) { } + pg_begin() { } + + void encode(bufferlist& bl) const { + // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then + // shard will be NO_SHARD for a replicated pool. This means + // that we allow the decode by struct_v 2. + ENCODE_START(3, 2, bl); + ::encode(pgid.pgid, bl); + ::encode(superblock, bl); + ::encode(pgid.shard, bl); + ENCODE_FINISH(bl); + } + // NOTE: New super_ver prevents decode from ver 1 + void decode(bufferlist::iterator& bl) { + DECODE_START(3, bl); + ::decode(pgid.pgid, bl); + if (struct_v > 1) { + ::decode(superblock, bl); + } + if (struct_v > 2) { + ::decode(pgid.shard, bl); + } else { + pgid.shard = shard_id_t::NO_SHARD; + } + DECODE_FINISH(bl); + } +}; + +struct object_begin { + ghobject_t hoid; + + // Duplicate what is in the OI_ATTR so we have it at the start + // of object processing. + object_info_t oi; + + explicit object_begin(const ghobject_t &hoid): hoid(hoid) { } + object_begin() { } + + // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then + // generation will be NO_GEN, shard_id will be NO_SHARD for a replicated + // pool. This means we will allow the decode by struct_v 1. + void encode(bufferlist& bl) const { + ENCODE_START(3, 1, bl); + ::encode(hoid.hobj, bl); + ::encode(hoid.generation, bl); + ::encode(hoid.shard_id, bl); + ::encode(oi, bl, -1); /* FIXME: we always encode with full features */ + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(3, bl); + ::decode(hoid.hobj, bl); + if (struct_v > 1) { + ::decode(hoid.generation, bl); + ::decode(hoid.shard_id, bl); + } else { + hoid.generation = ghobject_t::NO_GEN; + hoid.shard_id = shard_id_t::NO_SHARD; + } + if (struct_v > 2) { + ::decode(oi, bl); + } + DECODE_FINISH(bl); + } +}; + +struct data_section { + uint64_t offset; + uint64_t len; + bufferlist databl; + data_section(uint64_t offset, uint64_t len, bufferlist bl): + offset(offset), len(len), databl(bl) { } + data_section(): offset(0), len(0) { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(offset, bl); + ::encode(len, bl); + ::encode(databl, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(offset, bl); + ::decode(len, bl); + ::decode(databl, bl); + DECODE_FINISH(bl); + } +}; + +struct attr_section { + map data; + explicit attr_section(const map &data) : data(data) { } + explicit attr_section(map &data_) + { + for (std::map::iterator i = data_.begin(); + i != data_.end(); ++i) { + bufferlist bl; + bl.push_front(i->second); + data[i->first] = bl; + } + } + + attr_section() { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(data, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(data, bl); + DECODE_FINISH(bl); + } +}; + +struct omap_hdr_section { + bufferlist hdr; + explicit omap_hdr_section(bufferlist hdr) : hdr(hdr) { } + omap_hdr_section() { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(hdr, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(hdr, bl); + DECODE_FINISH(bl); + } +}; + +struct omap_section { + map omap; + explicit omap_section(const map &omap) : + omap(omap) { } + omap_section() { } + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(omap, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(omap, bl); + DECODE_FINISH(bl); + } +}; + +struct metadata_section { + // struct_ver is the on-disk version of original pg + __u8 struct_ver; // for reference + epoch_t map_epoch; + pg_info_t info; + pg_log_t log; + PastIntervals past_intervals; + OSDMap osdmap; + bufferlist osdmap_bl; // Used in lieu of encoding osdmap due to crc checking + map divergent_priors; + pg_missing_t missing; + + metadata_section( + __u8 struct_ver, + epoch_t map_epoch, + const pg_info_t &info, + const pg_log_t &log, + const PastIntervals &past_intervals, + const pg_missing_t &missing) + : struct_ver(struct_ver), + map_epoch(map_epoch), + info(info), + log(log), + past_intervals(past_intervals), + missing(missing) {} + metadata_section() + : struct_ver(0), + map_epoch(0) { } + + void encode(bufferlist& bl) const { + ENCODE_START(6, 6, bl); + ::encode(struct_ver, bl); + ::encode(map_epoch, bl); + ::encode(info, bl); + ::encode(log, bl); + ::encode(past_intervals, bl); + // Equivalent to osdmap.encode(bl, features); but + // preserving exact layout for CRC checking. + bl.append(osdmap_bl); + ::encode(divergent_priors, bl); + ::encode(missing, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator& bl) { + DECODE_START(6, bl); + ::decode(struct_ver, bl); + ::decode(map_epoch, bl); + ::decode(info, bl); + ::decode(log, bl); + if (struct_v >= 6) { + ::decode(past_intervals, bl); + } else if (struct_v > 1) { + past_intervals.decode_classic(bl); + } else { + cout << "NOTICE: Older export without past_intervals" << std::endl; + } + if (struct_v > 2) { + osdmap.decode(bl); + } else { + cout << "WARNING: Older export without OSDMap information" << std::endl; + } + if (struct_v > 3) { + ::decode(divergent_priors, bl); + } + if (struct_v > 4) { + ::decode(missing, bl); + } + DECODE_FINISH(bl); + } +}; + +/** + * Superclass for classes that will need to handle a serialized RADOS + * dump. Requires that the serialized dump be opened with a known FD. + */ +class RadosDump +{ + protected: + int file_fd; + super_header sh; + bool dry_run; + + public: + RadosDump(int file_fd_, bool dry_run_) + : file_fd(file_fd_), dry_run(dry_run_) + {} + + int read_super(); + int get_header(header *h); + int get_footer(footer *f); + int read_section(sectiontype_t *type, bufferlist *bl); + int skip_object(bufferlist &bl); + void write_super(); + + // Define this in .h because it's templated + template + int write_section(sectiontype_t type, const T& obj, int fd) { + if (dry_run) + return 0; + bufferlist blhdr, bl, blftr; + obj.encode(bl); + header hdr(type, bl.length()); + hdr.encode(blhdr); + footer ft; + ft.encode(blftr); + + int ret = blhdr.write_fd(fd); + if (ret) return ret; + ret = bl.write_fd(fd); + if (ret) return ret; + ret = blftr.write_fd(fd); + return ret; + } + + int write_simple(sectiontype_t type, int fd) + { + if (dry_run) + return 0; + bufferlist hbl; + + header hdr(type, 0); + hdr.encode(hbl); + return hbl.write_fd(fd); + } +}; + +#endif