X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Flibrbd%2Finternal.cc;fp=src%2Fceph%2Fsrc%2Flibrbd%2Finternal.cc;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=acbe336629ccd4c52aee6ed94e4e06a533865b30;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/src/librbd/internal.cc b/src/ceph/src/librbd/internal.cc deleted file mode 100644 index acbe336..0000000 --- a/src/ceph/src/librbd/internal.cc +++ /dev/null @@ -1,2360 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#include "include/int_types.h" - -#include -#include - -#include "include/types.h" -#include "include/uuid.h" -#include "common/ceph_context.h" -#include "common/dout.h" -#include "common/errno.h" -#include "common/Throttle.h" -#include "common/event_socket.h" -#include "cls/lock/cls_lock_client.h" -#include "include/stringify.h" - -#include "cls/rbd/cls_rbd.h" -#include "cls/rbd/cls_rbd_types.h" -#include "cls/rbd/cls_rbd_client.h" -#include "cls/journal/cls_journal_types.h" -#include "cls/journal/cls_journal_client.h" - -#include "librbd/ExclusiveLock.h" -#include "librbd/ImageCtx.h" -#include "librbd/ImageState.h" -#include "librbd/internal.h" -#include "librbd/Journal.h" -#include "librbd/ObjectMap.h" -#include "librbd/Operations.h" -#include "librbd/Types.h" -#include "librbd/Utils.h" -#include "librbd/api/Image.h" -#include "librbd/exclusive_lock/AutomaticPolicy.h" -#include "librbd/exclusive_lock/StandardPolicy.h" -#include "librbd/image/CloneRequest.h" -#include "librbd/image/CreateRequest.h" -#include "librbd/image/RemoveRequest.h" -#include "librbd/io/AioCompletion.h" -#include "librbd/io/ImageRequest.h" -#include "librbd/io/ImageRequestWQ.h" -#include "librbd/io/ObjectRequest.h" -#include "librbd/io/ReadResult.h" -#include "librbd/journal/Types.h" -#include "librbd/managed_lock/Types.h" -#include "librbd/mirror/EnableRequest.h" -#include "librbd/operation/TrimRequest.h" - -#include "journal/Journaler.h" - -#include -#include -#include "include/assert.h" - -#define dout_subsys ceph_subsys_rbd -#undef dout_prefix -#define dout_prefix *_dout << "librbd: " - -#define rbd_howmany(x, y) (((x) + (y) - 1) / (y)) - -using std::map; -using std::pair; -using std::set; -using std::string; -using std::vector; -// list binds to list() here, so std::list is explicitly used below - -using ceph::bufferlist; -using librados::snap_t; -using librados::IoCtx; -using librados::Rados; - -namespace librbd { - -namespace { - -int validate_pool(IoCtx &io_ctx, CephContext *cct) { - if (!cct->_conf->get_val("rbd_validate_pool")) { - return 0; - } - - int r = io_ctx.stat(RBD_DIRECTORY, NULL, NULL); - if (r == 0) { - return 0; - } else if (r < 0 && r != -ENOENT) { - lderr(cct) << "failed to stat RBD directory: " << cpp_strerror(r) << dendl; - return r; - } - - // allocate a self-managed snapshot id if this a new pool to force - // self-managed snapshot mode - uint64_t snap_id; - r = io_ctx.selfmanaged_snap_create(&snap_id); - if (r == -EINVAL) { - lderr(cct) << "pool not configured for self-managed RBD snapshot support" - << dendl; - return r; - } else if (r < 0) { - lderr(cct) << "failed to allocate self-managed snapshot: " - << cpp_strerror(r) << dendl; - return r; - } - - r = io_ctx.selfmanaged_snap_remove(snap_id); - if (r < 0) { - lderr(cct) << "failed to release self-managed snapshot " << snap_id - << ": " << cpp_strerror(r) << dendl; - } - return 0; -} - - -} // anonymous namespace - - int detect_format(IoCtx &io_ctx, const string &name, - bool *old_format, uint64_t *size) - { - CephContext *cct = (CephContext *)io_ctx.cct(); - if (old_format) - *old_format = true; - int r = io_ctx.stat(util::old_header_name(name), size, NULL); - if (r == -ENOENT) { - if (old_format) - *old_format = false; - r = io_ctx.stat(util::id_obj_name(name), size, NULL); - if (r < 0) - return r; - } else if (r < 0) { - return r; - } - - ldout(cct, 20) << "detect format of " << name << " : " - << (old_format ? (*old_format ? "old" : "new") : - "don't care") << dendl; - return 0; - } - - bool has_parent(int64_t parent_pool_id, uint64_t off, uint64_t overlap) - { - return (parent_pool_id != -1 && off <= overlap); - } - - void init_rbd_header(struct rbd_obj_header_ondisk& ondisk, - uint64_t size, int order, uint64_t bid) - { - uint32_t hi = bid >> 32; - uint32_t lo = bid & 0xFFFFFFFF; - uint32_t extra = rand() % 0xFFFFFFFF; - memset(&ondisk, 0, sizeof(ondisk)); - - memcpy(&ondisk.text, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT)); - memcpy(&ondisk.signature, RBD_HEADER_SIGNATURE, - sizeof(RBD_HEADER_SIGNATURE)); - memcpy(&ondisk.version, RBD_HEADER_VERSION, sizeof(RBD_HEADER_VERSION)); - - snprintf(ondisk.block_name, sizeof(ondisk.block_name), "rb.%x.%x.%x", - hi, lo, extra); - - ondisk.image_size = size; - ondisk.options.order = order; - ondisk.options.crypt_type = RBD_CRYPT_NONE; - ondisk.options.comp_type = RBD_COMP_NONE; - ondisk.snap_seq = 0; - ondisk.snap_count = 0; - ondisk.reserved = 0; - ondisk.snap_names_len = 0; - } - - void image_info(ImageCtx *ictx, image_info_t& info, size_t infosize) - { - int obj_order = ictx->order; - ictx->snap_lock.get_read(); - info.size = ictx->get_image_size(ictx->snap_id); - ictx->snap_lock.put_read(); - info.obj_size = 1ULL << obj_order; - info.num_objs = Striper::get_num_objects(ictx->layout, info.size); - info.order = obj_order; - strncpy(info.block_name_prefix, ictx->object_prefix.c_str(), - RBD_MAX_BLOCK_NAME_SIZE); - info.block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE - 1] = '\0'; - - // clear deprecated fields - info.parent_pool = -1L; - info.parent_name[0] = '\0'; - } - - uint64_t oid_to_object_no(const string& oid, const string& object_prefix) - { - istringstream iss(oid); - // skip object prefix and separator - iss.ignore(object_prefix.length() + 1); - uint64_t num; - iss >> std::hex >> num; - return num; - } - - void trim_image(ImageCtx *ictx, uint64_t newsize, ProgressContext& prog_ctx) - { - assert(ictx->owner_lock.is_locked()); - assert(ictx->exclusive_lock == nullptr || - ictx->exclusive_lock->is_lock_owner()); - - C_SaferCond ctx; - ictx->snap_lock.get_read(); - operation::TrimRequest<> *req = operation::TrimRequest<>::create( - *ictx, &ctx, ictx->size, newsize, prog_ctx); - ictx->snap_lock.put_read(); - req->send(); - - int r = ctx.wait(); - if (r < 0) { - lderr(ictx->cct) << "warning: failed to remove some object(s): " - << cpp_strerror(r) << dendl; - } - } - - int read_header_bl(IoCtx& io_ctx, const string& header_oid, - bufferlist& header, uint64_t *ver) - { - int r; - uint64_t off = 0; -#define READ_SIZE 4096 - do { - bufferlist bl; - r = io_ctx.read(header_oid, bl, READ_SIZE, off); - if (r < 0) - return r; - header.claim_append(bl); - off += r; - } while (r == READ_SIZE); - - if (header.length() < sizeof(RBD_HEADER_TEXT) || - memcmp(RBD_HEADER_TEXT, header.c_str(), sizeof(RBD_HEADER_TEXT))) { - CephContext *cct = (CephContext *)io_ctx.cct(); - lderr(cct) << "unrecognized header format" << dendl; - return -ENXIO; - } - - if (ver) - *ver = io_ctx.get_last_version(); - - return 0; - } - - int read_header(IoCtx& io_ctx, const string& header_oid, - struct rbd_obj_header_ondisk *header, uint64_t *ver) - { - bufferlist header_bl; - int r = read_header_bl(io_ctx, header_oid, header_bl, ver); - if (r < 0) - return r; - if (header_bl.length() < (int)sizeof(*header)) - return -EIO; - memcpy(header, header_bl.c_str(), sizeof(*header)); - - return 0; - } - - int tmap_set(IoCtx& io_ctx, const string& imgname) - { - bufferlist cmdbl, emptybl; - __u8 c = CEPH_OSD_TMAP_SET; - ::encode(c, cmdbl); - ::encode(imgname, cmdbl); - ::encode(emptybl, cmdbl); - return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl); - } - - int tmap_rm(IoCtx& io_ctx, const string& imgname) - { - bufferlist cmdbl; - __u8 c = CEPH_OSD_TMAP_RM; - ::encode(c, cmdbl); - ::encode(imgname, cmdbl); - return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl); - } - - typedef boost::variant image_option_value_t; - typedef std::map image_options_t; - typedef std::shared_ptr image_options_ref; - - enum image_option_type_t { - STR, - UINT64, - }; - - const std::map IMAGE_OPTIONS_TYPE_MAPPING = { - {RBD_IMAGE_OPTION_FORMAT, UINT64}, - {RBD_IMAGE_OPTION_FEATURES, UINT64}, - {RBD_IMAGE_OPTION_ORDER, UINT64}, - {RBD_IMAGE_OPTION_STRIPE_UNIT, UINT64}, - {RBD_IMAGE_OPTION_STRIPE_COUNT, UINT64}, - {RBD_IMAGE_OPTION_JOURNAL_ORDER, UINT64}, - {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH, UINT64}, - {RBD_IMAGE_OPTION_JOURNAL_POOL, STR}, - {RBD_IMAGE_OPTION_FEATURES_SET, UINT64}, - {RBD_IMAGE_OPTION_FEATURES_CLEAR, UINT64}, - {RBD_IMAGE_OPTION_DATA_POOL, STR}, - }; - - std::string image_option_name(int optname) { - switch (optname) { - case RBD_IMAGE_OPTION_FORMAT: - return "format"; - case RBD_IMAGE_OPTION_FEATURES: - return "features"; - case RBD_IMAGE_OPTION_ORDER: - return "order"; - case RBD_IMAGE_OPTION_STRIPE_UNIT: - return "stripe_unit"; - case RBD_IMAGE_OPTION_STRIPE_COUNT: - return "stripe_count"; - case RBD_IMAGE_OPTION_JOURNAL_ORDER: - return "journal_order"; - case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH: - return "journal_splay_width"; - case RBD_IMAGE_OPTION_JOURNAL_POOL: - return "journal_pool"; - case RBD_IMAGE_OPTION_FEATURES_SET: - return "features_set"; - case RBD_IMAGE_OPTION_FEATURES_CLEAR: - return "features_clear"; - case RBD_IMAGE_OPTION_DATA_POOL: - return "data_pool"; - default: - return "unknown (" + stringify(optname) + ")"; - } - } - - std::ostream &operator<<(std::ostream &os, const ImageOptions &opts) { - os << "["; - - const char *delimiter = ""; - for (auto &i : IMAGE_OPTIONS_TYPE_MAPPING) { - if (i.second == STR) { - std::string val; - if (opts.get(i.first, &val) == 0) { - os << delimiter << image_option_name(i.first) << "=" << val; - delimiter = ", "; - } - } else if (i.second == UINT64) { - uint64_t val; - if (opts.get(i.first, &val) == 0) { - os << delimiter << image_option_name(i.first) << "=" << val; - delimiter = ", "; - } - } - } - - os << "]"; - - return os; - } - - void image_options_create(rbd_image_options_t* opts) - { - image_options_ref* opts_ = new image_options_ref(new image_options_t()); - - *opts = static_cast(opts_); - } - - void image_options_create_ref(rbd_image_options_t* opts, - rbd_image_options_t orig) - { - image_options_ref* orig_ = static_cast(orig); - image_options_ref* opts_ = new image_options_ref(*orig_); - - *opts = static_cast(opts_); - } - - void image_options_copy(rbd_image_options_t* opts, - const ImageOptions &orig) - { - image_options_ref* opts_ = new image_options_ref(new image_options_t()); - - *opts = static_cast(opts_); - - std::string str_val; - uint64_t uint64_val; - for (auto &i : IMAGE_OPTIONS_TYPE_MAPPING) { - switch (i.second) { - case STR: - if (orig.get(i.first, &str_val) == 0) { - image_options_set(*opts, i.first, str_val); - } - continue; - case UINT64: - if (orig.get(i.first, &uint64_val) == 0) { - image_options_set(*opts, i.first, uint64_val); - } - continue; - } - } - } - - void image_options_destroy(rbd_image_options_t opts) - { - image_options_ref* opts_ = static_cast(opts); - - delete opts_; - } - - int image_options_set(rbd_image_options_t opts, int optname, - const std::string& optval) - { - image_options_ref* opts_ = static_cast(opts); - - std::map::const_iterator i = - IMAGE_OPTIONS_TYPE_MAPPING.find(optname); - - if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) { - return -EINVAL; - } - - (*opts_->get())[optname] = optval; - return 0; - } - - int image_options_set(rbd_image_options_t opts, int optname, uint64_t optval) - { - image_options_ref* opts_ = static_cast(opts); - - std::map::const_iterator i = - IMAGE_OPTIONS_TYPE_MAPPING.find(optname); - - if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) { - return -EINVAL; - } - - (*opts_->get())[optname] = optval; - return 0; - } - - int image_options_get(rbd_image_options_t opts, int optname, - std::string* optval) - { - image_options_ref* opts_ = static_cast(opts); - - std::map::const_iterator i = - IMAGE_OPTIONS_TYPE_MAPPING.find(optname); - - if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) { - return -EINVAL; - } - - image_options_t::const_iterator j = (*opts_)->find(optname); - - if (j == (*opts_)->end()) { - return -ENOENT; - } - - *optval = boost::get(j->second); - return 0; - } - - int image_options_get(rbd_image_options_t opts, int optname, uint64_t* optval) - { - image_options_ref* opts_ = static_cast(opts); - - std::map::const_iterator i = - IMAGE_OPTIONS_TYPE_MAPPING.find(optname); - - if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) { - return -EINVAL; - } - - image_options_t::const_iterator j = (*opts_)->find(optname); - - if (j == (*opts_)->end()) { - return -ENOENT; - } - - *optval = boost::get(j->second); - return 0; - } - - int image_options_is_set(rbd_image_options_t opts, int optname, - bool* is_set) - { - if (IMAGE_OPTIONS_TYPE_MAPPING.find(optname) == - IMAGE_OPTIONS_TYPE_MAPPING.end()) { - return -EINVAL; - } - - image_options_ref* opts_ = static_cast(opts); - *is_set = ((*opts_)->find(optname) != (*opts_)->end()); - return 0; - } - - int image_options_unset(rbd_image_options_t opts, int optname) - { - image_options_ref* opts_ = static_cast(opts); - - std::map::const_iterator i = - IMAGE_OPTIONS_TYPE_MAPPING.find(optname); - - if (i == IMAGE_OPTIONS_TYPE_MAPPING.end()) { - assert((*opts_)->find(optname) == (*opts_)->end()); - return -EINVAL; - } - - image_options_t::const_iterator j = (*opts_)->find(optname); - - if (j == (*opts_)->end()) { - return -ENOENT; - } - - (*opts_)->erase(j); - return 0; - } - - void image_options_clear(rbd_image_options_t opts) - { - image_options_ref* opts_ = static_cast(opts); - - (*opts_)->clear(); - } - - bool image_options_is_empty(rbd_image_options_t opts) - { - image_options_ref* opts_ = static_cast(opts); - - return (*opts_)->empty(); - } - - int list(IoCtx& io_ctx, vector& names) - { - CephContext *cct = (CephContext *)io_ctx.cct(); - ldout(cct, 20) << "list " << &io_ctx << dendl; - - bufferlist bl; - int r = io_ctx.read(RBD_DIRECTORY, bl, 0, 0); - if (r < 0) { - if (r == -ENOENT) { - r = 0; - } - return r; - } - - // old format images are in a tmap - if (bl.length()) { - bufferlist::iterator p = bl.begin(); - bufferlist header; - map m; - ::decode(header, p); - ::decode(m, p); - for (map::iterator q = m.begin(); q != m.end(); ++q) { - names.push_back(q->first); - } - } - - map images; - r = api::Image<>::list_images(io_ctx, &images); - if (r < 0) { - lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl; - return r; - } - for (const auto& img_pair : images) { - names.push_back(img_pair.first); - } - - return 0; - } - - int flatten_children(ImageCtx *ictx, const char* snap_name, - ProgressContext& pctx) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << "children flatten " << ictx->name << dendl; - - RWLock::RLocker l(ictx->snap_lock); - snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name); - ParentSpec parent_spec(ictx->md_ctx.get_id(), ictx->id, snap_id); - map< pair, set > image_info; - - int r = api::Image<>::list_children(ictx, parent_spec, &image_info); - if (r < 0) { - return r; - } - - size_t size = image_info.size(); - if (size == 0) - return 0; - - size_t i = 0; - Rados rados(ictx->md_ctx); - for ( auto &info : image_info){ - string pool = info.first.second; - IoCtx ioctx; - r = rados.ioctx_create2(info.first.first, ioctx); - if (r < 0) { - lderr(cct) << "Error accessing child image pool " << pool - << dendl; - return r; - } - - for (auto &id_it : info.second) { - ImageCtx *imctx = new ImageCtx("", id_it, NULL, ioctx, false); - int r = imctx->state->open(false); - if (r < 0) { - lderr(cct) << "error opening image: " - << cpp_strerror(r) << dendl; - return r; - } - - librbd::NoOpProgressContext prog_ctx; - r = imctx->operations->flatten(prog_ctx); - if (r < 0) { - lderr(cct) << "error flattening image: " << pool << "/" << id_it - << cpp_strerror(r) << dendl; - imctx->state->close(); - return r; - } - - if ((imctx->features & RBD_FEATURE_DEEP_FLATTEN) == 0 && - !imctx->snaps.empty()) { - imctx->parent_lock.get_read(); - ParentInfo parent_info = imctx->parent_md; - imctx->parent_lock.put_read(); - - r = cls_client::remove_child(&imctx->md_ctx, RBD_CHILDREN, - parent_info.spec, imctx->id); - if (r < 0 && r != -ENOENT) { - lderr(cct) << "error removing child from children list" << dendl; - imctx->state->close(); - return r; - } - } - - r = imctx->state->close(); - if (r < 0) { - lderr(cct) << "failed to close image: " << cpp_strerror(r) << dendl; - return r; - } - } - pctx.update_progress(++i, size); - assert(i <= size); - } - - return 0; - } - - int list_children(ImageCtx *ictx, set >& names) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << "children list " << ictx->name << dendl; - - RWLock::RLocker l(ictx->snap_lock); - ParentSpec parent_spec(ictx->md_ctx.get_id(), ictx->id, ictx->snap_id); - map< pair, set > image_info; - - int r = api::Image<>::list_children(ictx, parent_spec, &image_info); - if (r < 0) { - return r; - } - - Rados rados(ictx->md_ctx); - for ( auto &info : image_info){ - IoCtx ioctx; - r = rados.ioctx_create2(info.first.first, ioctx); - if (r < 0) { - lderr(cct) << "Error accessing child image pool " << info.first.second - << dendl; - return r; - } - - for (auto &id_it : info.second) { - string name; - r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY, id_it, &name); - if (r < 0) { - lderr(cct) << "Error looking up name for image id " << id_it - << " in pool " << info.first.second << dendl; - return r; - } - names.insert(make_pair(info.first.second, name)); - } - } - - return 0; - } - - int get_snap_namespace(ImageCtx *ictx, - const char *snap_name, - cls::rbd::SnapshotNamespace *snap_namespace) { - ldout(ictx->cct, 20) << "get_snap_namespace " << ictx << " " << snap_name - << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - RWLock::RLocker l(ictx->snap_lock); - snap_t snap_id = ictx->get_snap_id(*snap_namespace, snap_name); - if (snap_id == CEPH_NOSNAP) - return -ENOENT; - r = ictx->get_snap_namespace(snap_id, snap_namespace); - return r; - } - - int snap_is_protected(ImageCtx *ictx, const char *snap_name, bool *is_protected) - { - ldout(ictx->cct, 20) << "snap_is_protected " << ictx << " " << snap_name - << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - RWLock::RLocker l(ictx->snap_lock); - snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name); - if (snap_id == CEPH_NOSNAP) - return -ENOENT; - bool is_unprotected; - r = ictx->is_snap_unprotected(snap_id, &is_unprotected); - // consider both PROTECTED or UNPROTECTING to be 'protected', - // since in either state they can't be deleted - *is_protected = !is_unprotected; - return r; - } - - int create_v1(IoCtx& io_ctx, const char *imgname, uint64_t size, int order) - { - CephContext *cct = (CephContext *)io_ctx.cct(); - - ldout(cct, 20) << __func__ << " " << &io_ctx << " name = " << imgname - << " size = " << size << " order = " << order << dendl; - int r = validate_pool(io_ctx, cct); - if (r < 0) { - return r; - } - - ldout(cct, 2) << "adding rbd image to directory..." << dendl; - r = tmap_set(io_ctx, imgname); - if (r < 0) { - lderr(cct) << "error adding image to directory: " << cpp_strerror(r) - << dendl; - return r; - } - - Rados rados(io_ctx); - uint64_t bid = rados.get_instance_id(); - - ldout(cct, 2) << "creating rbd image..." << dendl; - struct rbd_obj_header_ondisk header; - init_rbd_header(header, size, order, bid); - - bufferlist bl; - bl.append((const char *)&header, sizeof(header)); - - string header_oid = util::old_header_name(imgname); - r = io_ctx.write(header_oid, bl, bl.length(), 0); - if (r < 0) { - lderr(cct) << "Error writing image header: " << cpp_strerror(r) - << dendl; - int remove_r = tmap_rm(io_ctx, imgname); - if (remove_r < 0) { - lderr(cct) << "Could not remove image from directory after " - << "header creation failed: " - << cpp_strerror(remove_r) << dendl; - } - return r; - } - - ldout(cct, 2) << "done." << dendl; - return 0; - } - - int create(librados::IoCtx& io_ctx, const char *imgname, uint64_t size, - int *order) - { - uint64_t order_ = *order; - ImageOptions opts; - - int r = opts.set(RBD_IMAGE_OPTION_ORDER, order_); - assert(r == 0); - - r = create(io_ctx, imgname, "", size, opts, "", "", false); - - int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_); - assert(r1 == 0); - *order = order_; - - return r; - } - - int create(IoCtx& io_ctx, const char *imgname, uint64_t size, - bool old_format, uint64_t features, int *order, - uint64_t stripe_unit, uint64_t stripe_count) - { - if (!order) - return -EINVAL; - - uint64_t order_ = *order; - uint64_t format = old_format ? 1 : 2; - ImageOptions opts; - int r; - - r = opts.set(RBD_IMAGE_OPTION_FORMAT, format); - assert(r == 0); - r = opts.set(RBD_IMAGE_OPTION_FEATURES, features); - assert(r == 0); - r = opts.set(RBD_IMAGE_OPTION_ORDER, order_); - assert(r == 0); - r = opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); - assert(r == 0); - r = opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); - assert(r == 0); - - r = create(io_ctx, imgname, "", size, opts, "", "", false); - - int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_); - assert(r1 == 0); - *order = order_; - - return r; - } - - int create(IoCtx& io_ctx, const std::string &image_name, - const std::string &image_id, uint64_t size, - ImageOptions& opts, - const std::string &non_primary_global_image_id, - const std::string &primary_mirror_uuid, - bool skip_mirror_enable) - { - std::string id(image_id); - if (id.empty()) { - id = util::generate_image_id(io_ctx); - } - - CephContext *cct = (CephContext *)io_ctx.cct(); - ldout(cct, 10) << __func__ << " name=" << image_name << ", " - << "id= " << id << ", " - << "size=" << size << ", opts=" << opts << dendl; - - uint64_t format; - if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) - format = cct->_conf->get_val("rbd_default_format"); - bool old_format = format == 1; - - // make sure it doesn't already exist, in either format - int r = detect_format(io_ctx, image_name, NULL, NULL); - if (r != -ENOENT) { - if (r) { - lderr(cct) << "Could not tell if " << image_name << " already exists" - << dendl; - return r; - } - lderr(cct) << "rbd image " << image_name << " already exists" << dendl; - return -EEXIST; - } - - uint64_t order = 0; - if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0 || order == 0) { - order = cct->_conf->get_val("rbd_default_order"); - } - r = image::CreateRequest<>::validate_order(cct, order); - if (r < 0) { - return r; - } - - if (old_format) { - r = create_v1(io_ctx, image_name.c_str(), size, order); - } else { - ThreadPool *thread_pool; - ContextWQ *op_work_queue; - ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); - - C_SaferCond cond; - image::CreateRequest<> *req = image::CreateRequest<>::create( - io_ctx, image_name, id, size, opts, non_primary_global_image_id, - primary_mirror_uuid, skip_mirror_enable, op_work_queue, &cond); - req->send(); - - r = cond.wait(); - } - - int r1 = opts.set(RBD_IMAGE_OPTION_ORDER, order); - assert(r1 == 0); - - return r; - } - - /* - * Parent may be in different pool, hence different IoCtx - */ - int clone(IoCtx& p_ioctx, const char *p_name, const char *p_snap_name, - IoCtx& c_ioctx, const char *c_name, - uint64_t features, int *c_order, - uint64_t stripe_unit, int stripe_count) - { - uint64_t order = *c_order; - - ImageOptions opts; - opts.set(RBD_IMAGE_OPTION_FEATURES, features); - opts.set(RBD_IMAGE_OPTION_ORDER, order); - opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); - opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); - - int r = clone(p_ioctx, p_name, p_snap_name, c_ioctx, c_name, opts); - opts.get(RBD_IMAGE_OPTION_ORDER, &order); - *c_order = order; - return r; - } - - int clone(IoCtx& p_ioctx, const char *p_name, const char *p_snap_name, - IoCtx& c_ioctx, const char *c_name, ImageOptions& c_opts) - { - CephContext *cct = (CephContext *)p_ioctx.cct(); - if (p_snap_name == NULL) { - lderr(cct) << "image to be cloned must be a snapshot" << dendl; - return -EINVAL; - } - - // make sure parent snapshot exists - ImageCtx *p_imctx = new ImageCtx(p_name, "", p_snap_name, p_ioctx, true); - int r = p_imctx->state->open(false); - if (r < 0) { - lderr(cct) << "error opening parent image: " - << cpp_strerror(r) << dendl; - return r; - } - - r = clone(p_imctx, c_ioctx, c_name, "", c_opts, "", ""); - - int close_r = p_imctx->state->close(); - if (r == 0 && close_r < 0) { - r = close_r; - } - - if (r < 0) { - return r; - } - return 0; - } - - int clone(ImageCtx *p_imctx, IoCtx& c_ioctx, const std::string &c_name, - const std::string &c_id, ImageOptions& c_opts, - const std::string &non_primary_global_image_id, - const std::string &primary_mirror_uuid) - { - std::string id(c_id); - if (id.empty()) { - id = util::generate_image_id(c_ioctx); - } - - CephContext *cct = (CephContext *)c_ioctx.cct(); - ldout(cct, 10) << __func__ << " " - << "c_name=" << c_name << ", " - << "c_id= " << c_id << ", " - << "c_opts=" << c_opts << dendl; - - ThreadPool *thread_pool; - ContextWQ *op_work_queue; - ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); - - C_SaferCond cond; - auto *req = image::CloneRequest<>::create( - p_imctx, c_ioctx, c_name, id, c_opts, - non_primary_global_image_id, primary_mirror_uuid, op_work_queue, &cond); - req->send(); - - return cond.wait(); - } - - int rename(IoCtx& io_ctx, const char *srcname, const char *dstname) - { - CephContext *cct = (CephContext *)io_ctx.cct(); - ldout(cct, 20) << "rename " << &io_ctx << " " << srcname << " -> " - << dstname << dendl; - - ImageCtx *ictx = new ImageCtx(srcname, "", "", io_ctx, false); - int r = ictx->state->open(false); - if (r < 0) { - lderr(cct) << "error opening source image: " << cpp_strerror(r) << dendl; - return r; - } - BOOST_SCOPE_EXIT((ictx)) { - ictx->state->close(); - } BOOST_SCOPE_EXIT_END - - return ictx->operations->rename(dstname); - } - - int info(ImageCtx *ictx, image_info_t& info, size_t infosize) - { - ldout(ictx->cct, 20) << "info " << ictx << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - image_info(ictx, info, infosize); - return 0; - } - - int get_old_format(ImageCtx *ictx, uint8_t *old) - { - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - *old = ictx->old_format; - return 0; - } - - int get_size(ImageCtx *ictx, uint64_t *size) - { - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - RWLock::RLocker l2(ictx->snap_lock); - *size = ictx->get_image_size(ictx->snap_id); - return 0; - } - - int get_features(ImageCtx *ictx, uint64_t *features) - { - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - RWLock::RLocker l(ictx->snap_lock); - *features = ictx->features; - return 0; - } - - int get_overlap(ImageCtx *ictx, uint64_t *overlap) - { - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - RWLock::RLocker l(ictx->snap_lock); - RWLock::RLocker l2(ictx->parent_lock); - return ictx->get_parent_overlap(ictx->snap_id, overlap); - } - - int get_parent_info(ImageCtx *ictx, string *parent_pool_name, - string *parent_name, string *parent_id, - string *parent_snap_name) - { - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - RWLock::RLocker l(ictx->snap_lock); - RWLock::RLocker l2(ictx->parent_lock); - if (ictx->parent == NULL) { - return -ENOENT; - } - - ParentSpec parent_spec; - - if (ictx->snap_id == CEPH_NOSNAP) { - parent_spec = ictx->parent_md.spec; - } else { - r = ictx->get_parent_spec(ictx->snap_id, &parent_spec); - if (r < 0) { - lderr(ictx->cct) << "Can't find snapshot id = " << ictx->snap_id - << dendl; - return r; - } - if (parent_spec.pool_id == -1) - return -ENOENT; - } - if (parent_pool_name) { - Rados rados(ictx->md_ctx); - r = rados.pool_reverse_lookup(parent_spec.pool_id, - parent_pool_name); - if (r < 0) { - lderr(ictx->cct) << "error looking up pool name: " << cpp_strerror(r) - << dendl; - return r; - } - } - - if (parent_snap_name) { - RWLock::RLocker l(ictx->parent->snap_lock); - r = ictx->parent->get_snap_name(parent_spec.snap_id, - parent_snap_name); - if (r < 0) { - lderr(ictx->cct) << "error finding parent snap name: " - << cpp_strerror(r) << dendl; - return r; - } - } - - if (parent_name) { - RWLock::RLocker snap_locker(ictx->parent->snap_lock); - *parent_name = ictx->parent->name; - } - if (parent_id) { - *parent_id = ictx->parent->id; - } - - return 0; - } - - int get_flags(ImageCtx *ictx, uint64_t *flags) - { - int r = ictx->state->refresh_if_required(); - if (r < 0) { - return r; - } - - RWLock::RLocker l2(ictx->snap_lock); - return ictx->get_flags(ictx->snap_id, flags); - } - - int set_image_notification(ImageCtx *ictx, int fd, int type) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << __func__ << " " << ictx << " fd " << fd << " type" << type << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) { - return r; - } - - if (ictx->event_socket.is_valid()) - return -EINVAL; - return ictx->event_socket.init(fd, type); - } - - int is_exclusive_lock_owner(ImageCtx *ictx, bool *is_owner) - { - *is_owner = false; - - RWLock::RLocker owner_locker(ictx->owner_lock); - if (ictx->exclusive_lock == nullptr || - !ictx->exclusive_lock->is_lock_owner()) { - return 0; - } - - // might have been blacklisted by peer -- ensure we still own - // the lock by pinging the OSD - int r = ictx->exclusive_lock->assert_header_locked(); - if (r == -EBUSY || r == -ENOENT) { - return 0; - } else if (r < 0) { - return r; - } - - *is_owner = true; - return 0; - } - - int lock_acquire(ImageCtx *ictx, rbd_lock_mode_t lock_mode) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", " - << "lock_mode=" << lock_mode << dendl; - - if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) { - return -EOPNOTSUPP; - } - - C_SaferCond lock_ctx; - { - RWLock::WLocker l(ictx->owner_lock); - - if (ictx->exclusive_lock == nullptr) { - lderr(cct) << "exclusive-lock feature is not enabled" << dendl; - return -EINVAL; - } - - if (ictx->get_exclusive_lock_policy()->may_auto_request_lock()) { - ictx->set_exclusive_lock_policy( - new exclusive_lock::StandardPolicy(ictx)); - } - - if (ictx->exclusive_lock->is_lock_owner()) { - return 0; - } - - ictx->exclusive_lock->acquire_lock(&lock_ctx); - } - - int r = lock_ctx.wait(); - if (r < 0) { - lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r) - << dendl; - return r; - } - - RWLock::RLocker l(ictx->owner_lock); - - if (ictx->exclusive_lock == nullptr || - !ictx->exclusive_lock->is_lock_owner()) { - lderr(cct) << "failed to acquire exclusive lock" << dendl; - return -EROFS; - } - - return 0; - } - - int lock_release(ImageCtx *ictx) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl; - - C_SaferCond lock_ctx; - { - RWLock::WLocker l(ictx->owner_lock); - - if (ictx->exclusive_lock == nullptr || - !ictx->exclusive_lock->is_lock_owner()) { - lderr(cct) << "not exclusive lock owner" << dendl; - return -EINVAL; - } - - ictx->exclusive_lock->release_lock(&lock_ctx); - } - - int r = lock_ctx.wait(); - if (r < 0) { - lderr(cct) << "failed to release exclusive lock: " << cpp_strerror(r) - << dendl; - return r; - } - return 0; - } - - int lock_get_owners(ImageCtx *ictx, rbd_lock_mode_t *lock_mode, - std::list *lock_owners) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl; - - if (!ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) { - lderr(cct) << "exclusive-lock feature is not enabled" << dendl; - return -EINVAL; - } - - managed_lock::Locker locker; - C_SaferCond get_owner_ctx; - ExclusiveLock<>(*ictx).get_locker(&locker, &get_owner_ctx); - int r = get_owner_ctx.wait(); - if (r == -ENOENT) { - return r; - } else if (r < 0) { - lderr(cct) << "failed to determine current lock owner: " - << cpp_strerror(r) << dendl; - return r; - } - - *lock_mode = RBD_LOCK_MODE_EXCLUSIVE; - lock_owners->clear(); - lock_owners->emplace_back(locker.address); - return 0; - } - - int lock_break(ImageCtx *ictx, rbd_lock_mode_t lock_mode, - const std::string &lock_owner) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", " - << "lock_mode=" << lock_mode << ", " - << "lock_owner=" << lock_owner << dendl; - - if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) { - return -EOPNOTSUPP; - } - - if (ictx->read_only) { - return -EROFS; - } - - managed_lock::Locker locker; - C_SaferCond get_owner_ctx; - { - RWLock::RLocker l(ictx->owner_lock); - - if (ictx->exclusive_lock == nullptr) { - lderr(cct) << "exclusive-lock feature is not enabled" << dendl; - return -EINVAL; - } - - ictx->exclusive_lock->get_locker(&locker, &get_owner_ctx); - } - int r = get_owner_ctx.wait(); - if (r == -ENOENT) { - return r; - } else if (r < 0) { - lderr(cct) << "failed to determine current lock owner: " - << cpp_strerror(r) << dendl; - return r; - } - - if (locker.address != lock_owner) { - return -EBUSY; - } - - C_SaferCond break_ctx; - { - RWLock::RLocker l(ictx->owner_lock); - - if (ictx->exclusive_lock == nullptr) { - lderr(cct) << "exclusive-lock feature is not enabled" << dendl; - return -EINVAL; - } - - ictx->exclusive_lock->break_lock(locker, true, &break_ctx); - } - r = break_ctx.wait(); - if (r == -ENOENT) { - return r; - } else if (r < 0) { - lderr(cct) << "failed to break lock: " << cpp_strerror(r) << dendl; - return r; - } - return 0; - } - - int remove(IoCtx& io_ctx, const std::string &image_name, - const std::string &image_id, ProgressContext& prog_ctx, - bool force, bool from_trash_remove) - { - CephContext *cct((CephContext *)io_ctx.cct()); - ldout(cct, 20) << "remove " << &io_ctx << " " - << (image_id.empty() ? image_name : image_id) << dendl; - - ThreadPool *thread_pool; - ContextWQ *op_work_queue; - ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue); - - C_SaferCond cond; - auto req = librbd::image::RemoveRequest<>::create( - io_ctx, image_name, image_id, force, from_trash_remove, prog_ctx, - op_work_queue, &cond); - req->send(); - - return cond.wait(); - } - - int trash_move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source, - const std::string &image_name, uint64_t delay) { - CephContext *cct((CephContext *)io_ctx.cct()); - ldout(cct, 20) << "trash_move " << &io_ctx << " " << image_name - << dendl; - - std::string image_id; - ImageCtx *ictx = new ImageCtx(image_name, "", nullptr, io_ctx, false); - int r = ictx->state->open(true); - if (r < 0) { - ictx = nullptr; - - if (r != -ENOENT) { - ldout(cct, 2) << "error opening image: " << cpp_strerror(-r) << dendl; - return r; - } - - // try to get image id from the directory - r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name, &image_id); - if (r < 0) { - if (r != -ENOENT) { - ldout(cct, 2) << "error reading image id from dirctory: " - << cpp_strerror(-r) << dendl; - } - return r; - } - } else { - if (ictx->old_format) { - ictx->state->close(); - return -EOPNOTSUPP; - } - - image_id = ictx->id; - ictx->owner_lock.get_read(); - if (ictx->exclusive_lock != nullptr) { - r = ictx->operations->prepare_image_update(); - if (r < 0 || (ictx->exclusive_lock != nullptr && - !ictx->exclusive_lock->is_lock_owner())) { - lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl; - ictx->owner_lock.put_read(); - ictx->state->close(); - return -EBUSY; - } - } - } - - BOOST_SCOPE_EXIT_ALL(ictx, cct) { - if (ictx == nullptr) - return; - - bool is_locked = ictx->exclusive_lock != nullptr && - ictx->exclusive_lock->is_lock_owner(); - if (is_locked) { - C_SaferCond ctx; - auto exclusive_lock = ictx->exclusive_lock; - exclusive_lock->shut_down(&ctx); - ictx->owner_lock.put_read(); - int r = ctx.wait(); - if (r < 0) { - lderr(cct) << "error shutting down exclusive lock" << dendl; - } - delete exclusive_lock; - } else { - ictx->owner_lock.put_read(); - } - ictx->state->close(); - }; - - ldout(cct, 2) << "adding image entry to rbd_trash" << dendl; - utime_t ts = ceph_clock_now(); - utime_t deferment_end_time = ts; - deferment_end_time += (double)delay; - cls::rbd::TrashImageSource trash_source = - static_cast(source); - cls::rbd::TrashImageSpec trash_spec(trash_source, image_name, ts, - deferment_end_time); - r = cls_client::trash_add(&io_ctx, image_id, trash_spec); - if (r < 0 && r != -EEXIST) { - lderr(cct) << "error adding image " << image_name << " to rbd_trash" - << dendl; - return r; - } else if (r == -EEXIST) { - ldout(cct, 10) << "found previous unfinished deferred remove for image:" - << image_id << dendl; - // continue with removing image from directory - } - - ldout(cct, 2) << "removing id object..." << dendl; - r = io_ctx.remove(util::id_obj_name(image_name)); - if (r < 0 && r != -ENOENT) { - lderr(cct) << "error removing id object: " << cpp_strerror(r) - << dendl; - return r; - } - - ldout(cct, 2) << "removing rbd image from v2 directory..." << dendl; - r = cls_client::dir_remove_image(&io_ctx, RBD_DIRECTORY, image_name, - image_id); - if (r < 0) { - if (r != -ENOENT) { - lderr(cct) << "error removing image from v2 directory: " - << cpp_strerror(-r) << dendl; - } - return r; - } - - return 0; - } - - int trash_get(IoCtx &io_ctx, const std::string &id, - trash_image_info_t *info) { - CephContext *cct((CephContext *)io_ctx.cct()); - ldout(cct, 20) << __func__ << " " << &io_ctx << dendl; - - cls::rbd::TrashImageSpec spec; - int r = cls_client::trash_get(&io_ctx, id, &spec); - if (r == -ENOENT) { - return r; - } else if (r < 0) { - lderr(cct) << "error retrieving trash entry: " << cpp_strerror(r) - << dendl; - return r; - } - - rbd_trash_image_source_t source = static_cast( - spec.source); - *info = trash_image_info_t{id, spec.name, source, spec.deletion_time.sec(), - spec.deferment_end_time.sec()}; - return 0; - } - - int trash_list(IoCtx &io_ctx, vector &entries) { - CephContext *cct((CephContext *)io_ctx.cct()); - ldout(cct, 20) << "trash_list " << &io_ctx << dendl; - - bool more_entries; - uint32_t max_read = 1024; - std::string last_read = ""; - do { - map trash_entries; - int r = cls_client::trash_list(&io_ctx, last_read, max_read, - &trash_entries); - if (r < 0 && r != -ENOENT) { - lderr(cct) << "error listing rbd trash entries: " << cpp_strerror(r) - << dendl; - return r; - } else if (r == -ENOENT) { - break; - } - - if (trash_entries.empty()) { - break; - } - - for (const auto &entry : trash_entries) { - rbd_trash_image_source_t source = - static_cast(entry.second.source); - entries.push_back({entry.first, entry.second.name, source, - entry.second.deletion_time.sec(), - entry.second.deferment_end_time.sec()}); - } - last_read = trash_entries.rbegin()->first; - more_entries = (trash_entries.size() >= max_read); - } while (more_entries); - - return 0; - } - - int trash_remove(IoCtx &io_ctx, const std::string &image_id, bool force, - ProgressContext& prog_ctx) { - CephContext *cct((CephContext *)io_ctx.cct()); - ldout(cct, 20) << "trash_remove " << &io_ctx << " " << image_id - << " " << force << dendl; - - cls::rbd::TrashImageSpec trash_spec; - int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec); - if (r < 0) { - lderr(cct) << "error getting image id " << image_id - << " info from trash: " << cpp_strerror(r) << dendl; - return r; - } - - utime_t now = ceph_clock_now(); - if (now < trash_spec.deferment_end_time && !force) { - lderr(cct) << "error: deferment time has not expired." << dendl; - return -EPERM; - } - - r = remove(io_ctx, "", image_id, prog_ctx, false, true); - if (r < 0) { - lderr(cct) << "error removing image " << image_id - << ", which is pending deletion" << dendl; - return r; - } - r = cls_client::trash_remove(&io_ctx, image_id); - if (r < 0 && r != -ENOENT) { - lderr(cct) << "error removing image " << image_id - << " from rbd_trash object" << dendl; - return r; - } - return 0; - } - - int trash_restore(librados::IoCtx &io_ctx, const std::string &image_id, - const std::string &image_new_name) { - CephContext *cct((CephContext *)io_ctx.cct()); - ldout(cct, 20) << "trash_restore " << &io_ctx << " " << image_id << " " - << image_new_name << dendl; - - cls::rbd::TrashImageSpec trash_spec; - int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec); - if (r < 0) { - lderr(cct) << "error getting image id " << image_id - << " info from trash: " << cpp_strerror(r) << dendl; - return r; - } - - std::string image_name = image_new_name; - if (image_name.empty()) { - // if user didn't specify a new name, let's try using the old name - image_name = trash_spec.name; - ldout(cct, 20) << "restoring image id " << image_id << " with name " - << image_name << dendl; - } - - // check if no image exists with the same name - bool create_id_obj = true; - std::string existing_id; - r = cls_client::get_id(&io_ctx, util::id_obj_name(image_name), &existing_id); - if (r < 0 && r != -ENOENT) { - lderr(cct) << "error checking if image " << image_name << " exists: " - << cpp_strerror(r) << dendl; - return r; - } else if (r != -ENOENT){ - // checking if we are recovering from an incomplete restore - if (existing_id != image_id) { - ldout(cct, 2) << "an image with the same name already exists" << dendl; - return -EEXIST; - } - create_id_obj = false; - } - - if (create_id_obj) { - ldout(cct, 2) << "adding id object" << dendl; - librados::ObjectWriteOperation op; - op.create(true); - cls_client::set_id(&op, image_id); - r = io_ctx.operate(util::id_obj_name(image_name), &op); - if (r < 0) { - lderr(cct) << "error adding id object for image " << image_name - << ": " << cpp_strerror(r) << dendl; - return r; - } - } - - ldout(cct, 2) << "adding rbd image from v2 directory..." << dendl; - r = cls_client::dir_add_image(&io_ctx, RBD_DIRECTORY, image_name, - image_id); - if (r < 0 && r != -EEXIST) { - lderr(cct) << "error adding image to v2 directory: " - << cpp_strerror(r) << dendl; - return r; - } - - ldout(cct, 2) << "removing image from trash..." << dendl; - r = cls_client::trash_remove(&io_ctx, image_id); - if (r < 0 && r != -ENOENT) { - lderr(cct) << "error removing image id " << image_id << " from trash: " - << cpp_strerror(r) << dendl; - return r; - } - - return 0; - } - - int snap_list(ImageCtx *ictx, vector& snaps) - { - ldout(ictx->cct, 20) << "snap_list " << ictx << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - RWLock::RLocker l(ictx->snap_lock); - for (map::iterator it = ictx->snap_info.begin(); - it != ictx->snap_info.end(); ++it) { - snap_info_t info; - info.name = it->second.name; - info.id = it->first; - info.size = it->second.size; - snaps.push_back(info); - } - - return 0; - } - - int snap_exists(ImageCtx *ictx, const cls::rbd::SnapshotNamespace& snap_namespace, - const char *snap_name, bool *exists) - { - ldout(ictx->cct, 20) << "snap_exists " << ictx << " " << snap_name << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - RWLock::RLocker l(ictx->snap_lock); - *exists = ictx->get_snap_id(snap_namespace, snap_name) != CEPH_NOSNAP; - return 0; - } - - int snap_remove(ImageCtx *ictx, const char *snap_name, uint32_t flags, - ProgressContext& pctx) - { - ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_name << " flags: " << flags << dendl; - - int r = 0; - - r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - if (flags & RBD_SNAP_REMOVE_FLATTEN) { - r = flatten_children(ictx, snap_name, pctx); - if (r < 0) { - return r; - } - } - - bool is_protected; - r = snap_is_protected(ictx, snap_name, &is_protected); - if (r < 0) { - return r; - } - - if (is_protected && flags & RBD_SNAP_REMOVE_UNPROTECT) { - r = ictx->operations->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name); - if (r < 0) { - lderr(ictx->cct) << "failed to unprotect snapshot: " << snap_name << dendl; - return r; - } - - r = snap_is_protected(ictx, snap_name, &is_protected); - if (r < 0) { - return r; - } - if (is_protected) { - lderr(ictx->cct) << "snapshot is still protected after unprotection" << dendl; - ceph_abort(); - } - } - - C_SaferCond ctx; - ictx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name, &ctx); - - r = ctx.wait(); - return r; - } - - int snap_get_timestamp(ImageCtx *ictx, uint64_t snap_id, struct timespec *timestamp) - { - std::map::iterator snap_it = ictx->snap_info.find(snap_id); - assert(snap_it != ictx->snap_info.end()); - utime_t time = snap_it->second.timestamp; - time.to_timespec(timestamp); - return 0; - } - - int snap_get_limit(ImageCtx *ictx, uint64_t *limit) - { - int r = cls_client::snapshot_get_limit(&ictx->md_ctx, ictx->header_oid, - limit); - if (r == -EOPNOTSUPP) { - *limit = UINT64_MAX; - r = 0; - } - return r; - } - - int snap_set_limit(ImageCtx *ictx, uint64_t limit) - { - return ictx->operations->snap_set_limit(limit); - } - - struct CopyProgressCtx { - explicit CopyProgressCtx(ProgressContext &p) - : destictx(NULL), src_size(0), prog_ctx(p) - { } - - ImageCtx *destictx; - uint64_t src_size; - ProgressContext &prog_ctx; - }; - - int copy(ImageCtx *src, IoCtx& dest_md_ctx, const char *destname, - ImageOptions& opts, ProgressContext &prog_ctx, size_t sparse_size) - { - CephContext *cct = (CephContext *)dest_md_ctx.cct(); - ldout(cct, 20) << "copy " << src->name - << (src->snap_name.length() ? "@" + src->snap_name : "") - << " -> " << destname << " opts = " << opts << dendl; - - src->snap_lock.get_read(); - uint64_t features = src->features; - uint64_t src_size = src->get_image_size(src->snap_id); - src->snap_lock.put_read(); - uint64_t format = src->old_format ? 1 : 2; - if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) { - opts.set(RBD_IMAGE_OPTION_FORMAT, format); - } - uint64_t stripe_unit = src->stripe_unit; - if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) { - opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); - } - uint64_t stripe_count = src->stripe_count; - if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) { - opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); - } - uint64_t order = src->order; - if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) { - opts.set(RBD_IMAGE_OPTION_ORDER, order); - } - if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) { - opts.set(RBD_IMAGE_OPTION_FEATURES, features); - } - if (features & ~RBD_FEATURES_ALL) { - lderr(cct) << "librbd does not support requested features" << dendl; - return -ENOSYS; - } - - int r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false); - if (r < 0) { - lderr(cct) << "header creation failed" << dendl; - return r; - } - opts.set(RBD_IMAGE_OPTION_ORDER, static_cast(order)); - - ImageCtx *dest = new librbd::ImageCtx(destname, "", NULL, - dest_md_ctx, false); - r = dest->state->open(false); - if (r < 0) { - lderr(cct) << "failed to read newly created header" << dendl; - return r; - } - - r = copy(src, dest, prog_ctx, sparse_size); - - int close_r = dest->state->close(); - if (r == 0 && close_r < 0) { - r = close_r; - } - return r; - } - - class C_CopyWrite : public Context { - public: - C_CopyWrite(bufferlist *bl, Context* ctx) - : m_bl(bl), m_ctx(ctx) {} - void finish(int r) override { - delete m_bl; - m_ctx->complete(r); - } - private: - bufferlist *m_bl; - Context *m_ctx; - }; - - class C_CopyRead : public Context { - public: - C_CopyRead(SimpleThrottle *throttle, ImageCtx *dest, uint64_t offset, - bufferlist *bl, size_t sparse_size) - : m_throttle(throttle), m_dest(dest), m_offset(offset), m_bl(bl), - m_sparse_size(sparse_size) { - m_throttle->start_op(); - } - void finish(int r) override { - if (r < 0) { - lderr(m_dest->cct) << "error reading from source image at offset " - << m_offset << ": " << cpp_strerror(r) << dendl; - delete m_bl; - m_throttle->end_op(r); - return; - } - assert(m_bl->length() == (size_t)r); - - if (m_bl->is_zero()) { - delete m_bl; - m_throttle->end_op(r); - return; - } - - if (!m_sparse_size) { - m_sparse_size = (1 << m_dest->order); - } - - auto *throttle = m_throttle; - auto *end_op_ctx = new FunctionContext([throttle](int r) { - throttle->end_op(r); - }); - auto gather_ctx = new C_Gather(m_dest->cct, end_op_ctx); - - bufferptr m_ptr(m_bl->length()); - m_bl->rebuild(m_ptr); - size_t write_offset = 0; - size_t write_length = 0; - size_t offset = 0; - size_t length = m_bl->length(); - while (offset < length) { - if (util::calc_sparse_extent(m_ptr, - m_sparse_size, - length, - &write_offset, - &write_length, - &offset)) { - bufferptr write_ptr(m_ptr, write_offset, write_length); - bufferlist *write_bl = new bufferlist(); - write_bl->push_back(write_ptr); - Context *ctx = new C_CopyWrite(write_bl, gather_ctx->new_sub()); - auto comp = io::AioCompletion::create(ctx); - - // coordinate through AIO WQ to ensure lock is acquired if needed - m_dest->io_work_queue->aio_write(comp, m_offset + write_offset, - write_length, - std::move(*write_bl), - LIBRADOS_OP_FLAG_FADVISE_DONTNEED, - std::move(read_trace)); - write_offset = offset; - write_length = 0; - } - } - delete m_bl; - assert(gather_ctx->get_sub_created_count() > 0); - gather_ctx->activate(); - } - - ZTracer::Trace read_trace; - - private: - SimpleThrottle *m_throttle; - ImageCtx *m_dest; - uint64_t m_offset; - bufferlist *m_bl; - size_t m_sparse_size; - }; - - int copy(ImageCtx *src, ImageCtx *dest, ProgressContext &prog_ctx, size_t sparse_size) - { - src->snap_lock.get_read(); - uint64_t src_size = src->get_image_size(src->snap_id); - src->snap_lock.put_read(); - - dest->snap_lock.get_read(); - uint64_t dest_size = dest->get_image_size(dest->snap_id); - dest->snap_lock.put_read(); - - CephContext *cct = src->cct; - if (dest_size < src_size) { - lderr(cct) << " src size " << src_size << " > dest size " - << dest_size << dendl; - return -EINVAL; - } - int r; - map pairs; - - r = cls_client::metadata_list(&src->md_ctx, src->header_oid, "", 0, &pairs); - if (r < 0 && r != -EOPNOTSUPP && r != -EIO) { - lderr(cct) << "couldn't list metadata: " << cpp_strerror(r) << dendl; - return r; - } else if (r == 0 && !pairs.empty()) { - r = cls_client::metadata_set(&dest->md_ctx, dest->header_oid, pairs); - if (r < 0) { - lderr(cct) << "couldn't set metadata: " << cpp_strerror(r) << dendl; - return r; - } - } - - ZTracer::Trace trace; - if (src->blkin_trace_all) { - trace.init("copy", &src->trace_endpoint); - } - - RWLock::RLocker owner_lock(src->owner_lock); - SimpleThrottle throttle(src->concurrent_management_ops, false); - uint64_t period = src->get_stripe_period(); - unsigned fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL | - LIBRADOS_OP_FLAG_FADVISE_NOCACHE; - for (uint64_t offset = 0; offset < src_size; offset += period) { - if (throttle.pending_error()) { - return throttle.wait_for_ret(); - } - - uint64_t len = min(period, src_size - offset); - bufferlist *bl = new bufferlist(); - auto ctx = new C_CopyRead(&throttle, dest, offset, bl, sparse_size); - auto comp = io::AioCompletion::create_and_start( - ctx, src, io::AIO_TYPE_READ); - - io::ImageReadRequest<> req(*src, comp, {{offset, len}}, - io::ReadResult{bl}, fadvise_flags, - std::move(trace)); - ctx->read_trace = req.get_trace(); - - req.send(); - prog_ctx.update_progress(offset, src_size); - } - - r = throttle.wait_for_ret(); - if (r >= 0) - prog_ctx.update_progress(src_size, src_size); - return r; - } - - int snap_set(ImageCtx *ictx, const cls::rbd::SnapshotNamespace &snap_namespace, - const char *snap_name) - { - ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = " - << (snap_name ? snap_name : "NULL") << dendl; - - // ignore return value, since we may be set to a non-existent - // snapshot and the user is trying to fix that - ictx->state->refresh_if_required(); - - C_SaferCond ctx; - std::string name(snap_name == nullptr ? "" : snap_name); - ictx->state->snap_set(snap_namespace, name, &ctx); - - int r = ctx.wait(); - if (r < 0) { - if (r != -ENOENT) { - lderr(ictx->cct) << "failed to " << (name.empty() ? "un" : "") << "set " - << "snapshot: " << cpp_strerror(r) << dendl; - } - return r; - } - - return 0; - } - - int list_lockers(ImageCtx *ictx, - std::list *lockers, - bool *exclusive, - string *tag) - { - ldout(ictx->cct, 20) << "list_locks on image " << ictx << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - RWLock::RLocker locker(ictx->md_lock); - if (exclusive) - *exclusive = ictx->exclusive_locked; - if (tag) - *tag = ictx->lock_tag; - if (lockers) { - lockers->clear(); - map::const_iterator it; - for (it = ictx->lockers.begin(); it != ictx->lockers.end(); ++it) { - locker_t locker; - locker.client = stringify(it->first.locker); - locker.cookie = it->first.cookie; - locker.address = stringify(it->second.addr); - lockers->push_back(locker); - } - } - - return 0; - } - - int lock(ImageCtx *ictx, bool exclusive, const string& cookie, - const string& tag) - { - ldout(ictx->cct, 20) << "lock image " << ictx << " exclusive=" << exclusive - << " cookie='" << cookie << "' tag='" << tag << "'" - << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - /** - * If we wanted we could do something more intelligent, like local - * checks that we think we will succeed. But for now, let's not - * duplicate that code. - */ - { - RWLock::RLocker locker(ictx->md_lock); - r = rados::cls::lock::lock(&ictx->md_ctx, ictx->header_oid, RBD_LOCK_NAME, - exclusive ? LOCK_EXCLUSIVE : LOCK_SHARED, - cookie, tag, "", utime_t(), 0); - if (r < 0) { - return r; - } - } - - ictx->notify_update(); - return 0; - } - - int unlock(ImageCtx *ictx, const string& cookie) - { - ldout(ictx->cct, 20) << "unlock image " << ictx - << " cookie='" << cookie << "'" << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - { - RWLock::RLocker locker(ictx->md_lock); - r = rados::cls::lock::unlock(&ictx->md_ctx, ictx->header_oid, - RBD_LOCK_NAME, cookie); - if (r < 0) { - return r; - } - } - - ictx->notify_update(); - return 0; - } - - int break_lock(ImageCtx *ictx, const string& client, - const string& cookie) - { - ldout(ictx->cct, 20) << "break_lock image " << ictx << " client='" << client - << "' cookie='" << cookie << "'" << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - entity_name_t lock_client; - if (!lock_client.parse(client)) { - lderr(ictx->cct) << "Unable to parse client '" << client - << "'" << dendl; - return -EINVAL; - } - - if (ictx->blacklist_on_break_lock) { - typedef std::map Lockers; - Lockers lockers; - ClsLockType lock_type; - std::string lock_tag; - r = rados::cls::lock::get_lock_info(&ictx->md_ctx, ictx->header_oid, - RBD_LOCK_NAME, &lockers, &lock_type, - &lock_tag); - if (r < 0) { - lderr(ictx->cct) << "unable to retrieve lock info: " << cpp_strerror(r) - << dendl; - return r; - } - - std::string client_address; - for (Lockers::iterator it = lockers.begin(); - it != lockers.end(); ++it) { - if (it->first.locker == lock_client) { - client_address = stringify(it->second.addr); - break; - } - } - if (client_address.empty()) { - return -ENOENT; - } - - RWLock::RLocker locker(ictx->md_lock); - librados::Rados rados(ictx->md_ctx); - r = rados.blacklist_add(client_address, - ictx->blacklist_expire_seconds); - if (r < 0) { - lderr(ictx->cct) << "unable to blacklist client: " << cpp_strerror(r) - << dendl; - return r; - } - } - - r = rados::cls::lock::break_lock(&ictx->md_ctx, ictx->header_oid, - RBD_LOCK_NAME, cookie, lock_client); - if (r < 0) - return r; - ictx->notify_update(); - return 0; - } - - void rbd_ctx_cb(completion_t cb, void *arg) - { - Context *ctx = reinterpret_cast(arg); - auto comp = reinterpret_cast(cb); - ctx->complete(comp->get_return_value()); - comp->release(); - } - - int64_t read_iterate(ImageCtx *ictx, uint64_t off, uint64_t len, - int (*cb)(uint64_t, size_t, const char *, void *), - void *arg) - { - utime_t start_time, elapsed; - - ldout(ictx->cct, 20) << "read_iterate " << ictx << " off = " << off - << " len = " << len << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) - return r; - - uint64_t mylen = len; - ictx->snap_lock.get_read(); - r = clip_io(ictx, off, &mylen); - ictx->snap_lock.put_read(); - if (r < 0) - return r; - - int64_t total_read = 0; - uint64_t period = ictx->get_stripe_period(); - uint64_t left = mylen; - - ZTracer::Trace trace; - if (ictx->blkin_trace_all) { - trace.init("read_iterate", &ictx->trace_endpoint); - } - - RWLock::RLocker owner_locker(ictx->owner_lock); - start_time = ceph_clock_now(); - while (left > 0) { - uint64_t period_off = off - (off % period); - uint64_t read_len = min(period_off + period - off, left); - - bufferlist bl; - - C_SaferCond ctx; - auto c = io::AioCompletion::create_and_start(&ctx, ictx, - io::AIO_TYPE_READ); - io::ImageRequest<>::aio_read(ictx, c, {{off, read_len}}, - io::ReadResult{&bl}, 0, std::move(trace)); - - int ret = ctx.wait(); - if (ret < 0) { - return ret; - } - - r = cb(total_read, ret, bl.c_str(), arg); - if (r < 0) { - return r; - } - - total_read += ret; - left -= ret; - off += ret; - } - - elapsed = ceph_clock_now() - start_time; - ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); - ictx->perfcounter->inc(l_librbd_rd); - ictx->perfcounter->inc(l_librbd_rd_bytes, mylen); - return total_read; - } - - // validate extent against image size; clip to image size if necessary - int clip_io(ImageCtx *ictx, uint64_t off, uint64_t *len) - { - assert(ictx->snap_lock.is_locked()); - uint64_t image_size = ictx->get_image_size(ictx->snap_id); - bool snap_exists = ictx->snap_exists; - - if (!snap_exists) - return -ENOENT; - - // special-case "len == 0" requests: always valid - if (*len == 0) - return 0; - - // can't start past end - if (off >= image_size) - return -EINVAL; - - // clip requests that extend past end to just end - if ((off + *len) > image_size) - *len = (size_t)(image_size - off); - - return 0; - } - - int flush(ImageCtx *ictx) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << "flush " << ictx << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) { - return r; - } - - ictx->user_flushed(); - C_SaferCond ctx; - { - RWLock::RLocker owner_locker(ictx->owner_lock); - ictx->flush(&ctx); - } - r = ctx.wait(); - - ictx->perfcounter->inc(l_librbd_flush); - return r; - } - - int invalidate_cache(ImageCtx *ictx) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << "invalidate_cache " << ictx << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) { - return r; - } - - RWLock::RLocker owner_locker(ictx->owner_lock); - RWLock::WLocker md_locker(ictx->md_lock); - r = ictx->invalidate_cache(false); - ictx->perfcounter->inc(l_librbd_invalidate_cache); - return r; - } - - int poll_io_events(ImageCtx *ictx, io::AioCompletion **comps, int numcomp) - { - if (numcomp <= 0) - return -EINVAL; - CephContext *cct = ictx->cct; - ldout(cct, 20) << __func__ << " " << ictx << " numcomp = " << numcomp - << dendl; - int i = 0; - Mutex::Locker l(ictx->completed_reqs_lock); - while (i < numcomp) { - if (ictx->completed_reqs.empty()) - break; - comps[i++] = ictx->completed_reqs.front(); - ictx->completed_reqs.pop_front(); - } - return i; - } - - int metadata_get(ImageCtx *ictx, const string &key, string *value) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << "metadata_get " << ictx << " key=" << key << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) { - return r; - } - - return cls_client::metadata_get(&ictx->md_ctx, ictx->header_oid, key, value); - } - - int metadata_list(ImageCtx *ictx, const string &start, uint64_t max, map *pairs) - { - CephContext *cct = ictx->cct; - ldout(cct, 20) << "metadata_list " << ictx << dendl; - - int r = ictx->state->refresh_if_required(); - if (r < 0) { - return r; - } - - return cls_client::metadata_list(&ictx->md_ctx, ictx->header_oid, start, max, pairs); - } - - struct C_RBD_Readahead : public Context { - ImageCtx *ictx; - object_t oid; - uint64_t offset; - uint64_t length; - C_RBD_Readahead(ImageCtx *ictx, object_t oid, uint64_t offset, uint64_t length) - : ictx(ictx), oid(oid), offset(offset), length(length) { } - void finish(int r) override { - ldout(ictx->cct, 20) << "C_RBD_Readahead on " << oid << ": " << offset << "+" << length << dendl; - ictx->readahead.dec_pending(); - } - }; - - void readahead(ImageCtx *ictx, - const vector >& image_extents) - { - uint64_t total_bytes = 0; - for (vector >::const_iterator p = image_extents.begin(); - p != image_extents.end(); - ++p) { - total_bytes += p->second; - } - - ictx->md_lock.get_write(); - bool abort = ictx->readahead_disable_after_bytes != 0 && - ictx->total_bytes_read > ictx->readahead_disable_after_bytes; - if (abort) { - ictx->md_lock.put_write(); - return; - } - ictx->total_bytes_read += total_bytes; - ictx->snap_lock.get_read(); - uint64_t image_size = ictx->get_image_size(ictx->snap_id); - ictx->snap_lock.put_read(); - ictx->md_lock.put_write(); - - pair readahead_extent = ictx->readahead.update(image_extents, image_size); - uint64_t readahead_offset = readahead_extent.first; - uint64_t readahead_length = readahead_extent.second; - - if (readahead_length > 0) { - ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~" << readahead_length << dendl; - map > readahead_object_extents; - Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout, - readahead_offset, readahead_length, 0, readahead_object_extents); - for (map >::iterator p = readahead_object_extents.begin(); p != readahead_object_extents.end(); ++p) { - for (vector::iterator q = p->second.begin(); q != p->second.end(); ++q) { - ldout(ictx->cct, 20) << "(readahead) oid " << q->oid << " " << q->offset << "~" << q->length << dendl; - - Context *req_comp = new C_RBD_Readahead(ictx, q->oid, q->offset, q->length); - ictx->readahead.inc_pending(); - ictx->aio_read_from_cache(q->oid, q->objectno, NULL, - q->length, q->offset, - req_comp, 0, nullptr); - } - } - ictx->perfcounter->inc(l_librbd_readahead); - ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length); - } - } - - - -}