X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Flibradosstriper%2FRadosStriperImpl.cc;fp=src%2Fceph%2Fsrc%2Flibradosstriper%2FRadosStriperImpl.cc;h=0000000000000000000000000000000000000000;hb=7da45d65be36d36b880cc55c5036e96c24b53f00;hp=8d34ce51ecd5633aad6e1b02bb2a759a7e9f6c95;hpb=691462d09d0987b47e112d6ee8740375df3c51b2;p=stor4nfv.git diff --git a/src/ceph/src/libradosstriper/RadosStriperImpl.cc b/src/ceph/src/libradosstriper/RadosStriperImpl.cc deleted file mode 100644 index 8d34ce5..0000000 --- a/src/ceph/src/libradosstriper/RadosStriperImpl.cc +++ /dev/null @@ -1,1651 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 Sebastien Ponce - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include - -#include "libradosstriper/RadosStriperImpl.h" - -#include - -#include -#include -#include - -#include "include/types.h" -#include "include/uuid.h" -#include "include/ceph_fs.h" -#include "common/dout.h" -#include "common/strtol.h" -#include "osdc/Striper.h" -#include "librados/AioCompletionImpl.h" -#include - -/* - * This file contents the actual implementation of the rados striped objects interface. - * - * Striped objects are stored in rados in a set of regular rados objects, after their - * content has been striped using the osdc/Striper interface. - * - * The external attributes of the striped object are mapped to the attributes of the - * first underlying object. This first object has a set of extra external attributes - * storing the layout of the striped object for future read back. These attributes are : - * - striper.layout.object_size : the size of rados objects used. - * Must be a multiple of striper.layout.stripe_unit - * - striper.layout.stripe_unit : the size of a stripe unit - * - striper.layout.stripe_count : the number of stripes used - * - striper.size : total striped object size - * - * In general operations on striped objects are not atomic. - * However, a certain number of safety guards have been put to make the interface closer - * to atomicity : - * - each data operation takes a shared lock on the first rados object for the - * whole time of the operation - * - the remove and trunc operations take an exclusive lock on the first rados object - * for the whole time of the operation - * This makes sure that no removal/truncation of a striped object occurs while - * data operations are happening and vice versa. It thus makes sure that the layout - * of a striped object does not change during data operation, which is essential for - * data consistency. - * - * Still the writing to a striped object is not atomic. This means in particular that - * the size of an object may not be in sync with its content at all times. - * As the size is always garanteed to be updated first and in an atomic way, and as - * sparse striped objects are supported (see below), what will typically happen is - * that a reader that comes too soon after a write will read 0s instead of the actual - * data. - * - * Note that remove handles the pieces of the striped object in reverse order, - * so that the head object is removed last, making the completion of the deletion atomic. - * - * Striped objects can be sparse, typically in case data was written at the end of the - * striped object only. In such a case, some rados objects constituing the striped object - * may be missing. Other can be partial (only the beginning will have data) - * When dealing with such sparse striped files, missing objects are detected and - * considered as full of 0s. They are however not created until real data is written - * to them. - * - * There are a number of missing features/improvements that could be implemented. - * Here are some ideas : - * - implementation of missing entry points (compared to rados) - * In particular : clone_range, sparse_read, exec, aio_flush_async, tmaps, omaps, ... - * - */ - -#define dout_subsys ceph_subsys_rados -#undef dout_prefix -#define dout_prefix *_dout << "libradosstriper: " - -/// size of xattr buffer -#define XATTR_BUFFER_SIZE 32 - -/// names of the different xattr entries -#define XATTR_LAYOUT_STRIPE_UNIT "striper.layout.stripe_unit" -#define XATTR_LAYOUT_STRIPE_COUNT "striper.layout.stripe_count" -#define XATTR_LAYOUT_OBJECT_SIZE "striper.layout.object_size" -#define XATTR_SIZE "striper.size" -#define LOCK_PREFIX "lock." - -/// name of the lock used on objects to ensure layout stability during IO -#define RADOS_LOCK_NAME "striper.lock" - -/// format of the extension of rados objects created for a given striped object -#define RADOS_OBJECT_EXTENSION_FORMAT ".%016llx" - -/// default object layout -struct ceph_file_layout default_file_layout = { - init_le32(1<<22), // fl_stripe_unit - init_le32(1), // fl_stripe_count - init_le32(1<<22), // fl_object_size - init_le32(0), // fl_cas_hash - init_le32(0), // fl_object_stripe_unit - init_le32(-1), // fl_unused - init_le32(-1), // fl_pg_pool -}; - -using libradosstriper::MultiAioCompletionImplPtr; - -namespace { - -///////////////////////// CompletionData ///////////////////////////// - -/** - * struct handling the data needed to pass to the call back - * function in asynchronous operations - */ -struct CompletionData : RefCountedObject { - /// constructor - CompletionData(libradosstriper::RadosStriperImpl * striper, - const std::string& soid, - const std::string& lockCookie, - librados::AioCompletionImpl *userCompletion = 0, - int n = 1); - /// destructor - ~CompletionData() override; - /// complete method - void complete(int r); - /// striper to be used to handle the write completion - libradosstriper::RadosStriperImpl *m_striper; - /// striped object concerned by the write operation - std::string m_soid; - /// shared lock to be released at completion - std::string m_lockCookie; - /// completion handler - librados::IoCtxImpl::C_aio_Complete *m_ack; -}; - -CompletionData::CompletionData -(libradosstriper::RadosStriperImpl* striper, - const std::string& soid, - const std::string& lockCookie, - librados::AioCompletionImpl *userCompletion, - int n) : - RefCountedObject(striper->cct(), n), - m_striper(striper), m_soid(soid), m_lockCookie(lockCookie), m_ack(0) { - m_striper->get(); - if (userCompletion) { - m_ack = new librados::IoCtxImpl::C_aio_Complete(userCompletion); - userCompletion->io = striper->m_ioCtxImpl; - } -} - -CompletionData::~CompletionData() { - if (m_ack) delete m_ack; - m_striper->put(); -} - -void CompletionData::complete(int r) { - if (m_ack) m_ack->finish(r); -} - -/** - * struct handling the data needed to pass to the call back - * function in asynchronous read operations - */ -struct ReadCompletionData : CompletionData { - /// bufferlist containing final result - bufferlist* m_bl; - /// extents that will be read - std::vector* m_extents; - /// intermediate results - std::vector* m_resultbl; - /// return code of read completion, to be remembered until unlocking happened - int m_readRc; - /// completion object for the unlocking of the striped object at the end of the read - librados::AioCompletion *m_unlockCompletion; - /// constructor - ReadCompletionData(libradosstriper::RadosStriperImpl * striper, - const std::string& soid, - const std::string& lockCookie, - librados::AioCompletionImpl *userCompletion, - bufferlist* bl, - std::vector* extents, - std::vector* resultbl, - int n); - /// destructor - ~ReadCompletionData() override; - /// complete method for when reading is over - void complete_read(int r); - /// complete method for when object is unlocked - void complete_unlock(int r); -}; - -ReadCompletionData::ReadCompletionData -(libradosstriper::RadosStriperImpl* striper, - const std::string& soid, - const std::string& lockCookie, - librados::AioCompletionImpl *userCompletion, - bufferlist* bl, - std::vector* extents, - std::vector* resultbl, - int n) : - CompletionData(striper, soid, lockCookie, userCompletion, n), - m_bl(bl), m_extents(extents), m_resultbl(resultbl), m_readRc(0), - m_unlockCompletion(0) {} - -ReadCompletionData::~ReadCompletionData() { - m_unlockCompletion->release(); - delete m_extents; - delete m_resultbl; -} - -void ReadCompletionData::complete_read(int r) { - // gather data into final buffer - Striper::StripedReadResult readResult; - vector::iterator bit = m_resultbl->begin(); - for (vector::iterator eit = m_extents->begin(); - eit != m_extents->end(); - ++eit, ++bit) { - readResult.add_partial_result(m_striper->cct(), *bit, eit->buffer_extents); - } - m_bl->clear(); - readResult.assemble_result(m_striper->cct(), *m_bl, true); - // Remember return code - m_readRc = r; -} - -void ReadCompletionData::complete_unlock(int r) { - // call parent's completion method - // Note that we ignore the return code of the unlock as we cannot do much about it - CompletionData::complete(m_readRc?m_readRc:m_bl->length()); -} - -/** - * struct handling the data needed to pass to the call back - * function in asynchronous write operations - */ -struct WriteCompletionData : CompletionData { - /// safe completion handler - librados::IoCtxImpl::C_aio_Complete *m_safe; - /// return code of write completion, to be remembered until unlocking happened - int m_writeRc; - /// completion object for the unlocking of the striped object at the end of the write - librados::AioCompletion *m_unlockCompletion; - /// constructor - WriteCompletionData(libradosstriper::RadosStriperImpl * striper, - const std::string& soid, - const std::string& lockCookie, - librados::AioCompletionImpl *userCompletion, - int n); - /// destructor - ~WriteCompletionData() override; - /// complete method for when writing is over - void complete_write(int r); - /// complete method for when object is unlocked - void complete_unlock(int r); - /// safe method - void safe(int r); -}; - -WriteCompletionData::WriteCompletionData -(libradosstriper::RadosStriperImpl* striper, - const std::string& soid, - const std::string& lockCookie, - librados::AioCompletionImpl *userCompletion, - int n) : - CompletionData(striper, soid, lockCookie, userCompletion, n), m_safe(0), - m_unlockCompletion(0), m_writeRc(0) { - if (userCompletion) { - m_safe = new librados::IoCtxImpl::C_aio_Complete(userCompletion); - } -} - -WriteCompletionData::~WriteCompletionData() { - m_unlockCompletion->release(); - if (m_safe) delete m_safe; -} - -void WriteCompletionData::complete_unlock(int r) { - // call parent's completion method - // Note that we ignore the return code of the unlock as we cannot do much about it - CompletionData::complete(m_writeRc); -} - -void WriteCompletionData::complete_write(int r) { - // Remember return code - m_writeRc = r; -} - -void WriteCompletionData::safe(int r) { - if (m_safe) m_safe->finish(r); -} - -struct RemoveCompletionData : CompletionData { - /// removal flags - int flags; - /** - * constructor - * note that the constructed object will take ownership of the lock - */ - RemoveCompletionData(libradosstriper::RadosStriperImpl * striper, - const std::string& soid, - const std::string& lockCookie, - librados::AioCompletionImpl *userCompletion, - int flags = 0) : - CompletionData(striper, soid, lockCookie, userCompletion), flags(flags) {} -}; - -/** - * struct handling the data needed to pass to the call back - * function in asynchronous truncate operations - */ -struct TruncateCompletionData : RefCountedObject { - /// constructor - TruncateCompletionData(libradosstriper::RadosStriperImpl* striper, - const std::string& soid, - uint64_t size) : - RefCountedObject(striper->cct()), - m_striper(striper), m_soid(soid), m_size(size) { - m_striper->get(); - } - /// destructor - ~TruncateCompletionData() override { - m_striper->put(); - } - /// striper to be used - libradosstriper::RadosStriperImpl *m_striper; - /// striped object concerned by the truncate operation - std::string m_soid; - /// the final size of the truncated object - uint64_t m_size; -}; - -/** - * struct handling the data needed to pass to the call back - * function in asynchronous read operations of a Rados File - */ -struct RadosReadCompletionData : RefCountedObject { - /// constructor - RadosReadCompletionData(MultiAioCompletionImplPtr multiAioCompl, - uint64_t expectedBytes, - bufferlist *bl, - CephContext *context, - int n = 1) : - RefCountedObject(context, n), - m_multiAioCompl(multiAioCompl), m_expectedBytes(expectedBytes), m_bl(bl) {} - /// the multi asynch io completion object to be used - MultiAioCompletionImplPtr m_multiAioCompl; - /// the expected number of bytes - uint64_t m_expectedBytes; - /// the bufferlist object where data have been written - bufferlist *m_bl; -}; - -/** - * struct handling (most of) the data needed to pass to the call back - * function in asynchronous stat operations. - * Inherited by the actual type for adding time information in different - * versions (time_t or struct timespec) - */ -struct BasicStatCompletionData : CompletionData { - /// constructor - BasicStatCompletionData(libradosstriper::RadosStriperImpl* striper, - const std::string& soid, - librados::AioCompletionImpl *userCompletion, - libradosstriper::MultiAioCompletionImpl *multiCompletion, - uint64_t *psize, - int n = 1) : - CompletionData(striper, soid, "", userCompletion, n), - m_multiCompletion(multiCompletion), m_psize(psize), - m_statRC(0), m_getxattrRC(0) {}; - // MultiAioCompletionImpl used to handle the double aysnc - // call in the back (stat + getxattr) - libradosstriper::MultiAioCompletionImpl *m_multiCompletion; - // where to store the size of first objct - // this will be ignored but we need a place to store it when - // async stat is called - uint64_t m_objectSize; - // where to store the file size - uint64_t *m_psize; - /// the bufferlist object used for the getxattr call - bufferlist m_bl; - /// return code of the stat - int m_statRC; - /// return code of the getxattr - int m_getxattrRC; -}; - -/** - * struct handling the data needed to pass to the call back - * function in asynchronous stat operations. - * Simple templated extension of BasicStatCompletionData. - * The template parameter is the type of the time information - * (used with time_t for stat and struct timespec for stat2) - */ -template -struct StatCompletionData : BasicStatCompletionData { - /// constructor - StatCompletionData(libradosstriper::RadosStriperImpl* striper, - const std::string& soid, - librados::AioCompletionImpl *userCompletion, - libradosstriper::MultiAioCompletionImpl *multiCompletion, - uint64_t *psize, - TimeType *pmtime, - int n = 1) : - BasicStatCompletionData(striper, soid, userCompletion, multiCompletion, psize, n), - m_pmtime(pmtime) {}; - // where to store the file time - TimeType *m_pmtime; -}; - -/** - * struct handling the data needed to pass to the call back - * function in asynchronous remove operations of a Rados File - */ -struct RadosRemoveCompletionData : RefCountedObject { - /// constructor - RadosRemoveCompletionData(MultiAioCompletionImplPtr multiAioCompl, - CephContext *context) : - RefCountedObject(context, 2), - m_multiAioCompl(multiAioCompl) {}; - /// the multi asynch io completion object to be used - MultiAioCompletionImplPtr m_multiAioCompl; -}; - - -} // namespace { - -///////////////////////// constructor ///////////////////////////// - -libradosstriper::RadosStriperImpl::RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl) : - m_refCnt(0),lock("RadosStriper Refcont", false, false), m_radosCluster(ioctx), m_ioCtx(ioctx), m_ioCtxImpl(ioctx_impl), - m_layout(default_file_layout) {} - -///////////////////////// layout ///////////////////////////// - -int libradosstriper::RadosStriperImpl::setObjectLayoutStripeUnit -(unsigned int stripe_unit) -{ - /* stripe unit must be non-zero, 64k increment */ - if (!stripe_unit || (stripe_unit & (CEPH_MIN_STRIPE_UNIT-1))) - return -EINVAL; - m_layout.fl_stripe_unit = stripe_unit; - return 0; -} - -int libradosstriper::RadosStriperImpl::setObjectLayoutStripeCount -(unsigned int stripe_count) -{ - /* stripe count must be non-zero */ - if (!stripe_count) - return -EINVAL; - m_layout.fl_stripe_count = stripe_count; - return 0; -} - -int libradosstriper::RadosStriperImpl::setObjectLayoutObjectSize -(unsigned int object_size) -{ - /* object size must be non-zero, 64k increment */ - if (!object_size || (object_size & (CEPH_MIN_STRIPE_UNIT-1))) - return -EINVAL; - /* object size must be a multiple of stripe unit */ - if (object_size < m_layout.fl_stripe_unit || - object_size % m_layout.fl_stripe_unit) - return -EINVAL; - m_layout.fl_object_size = object_size; - return 0; -} - -///////////////////////// xattrs ///////////////////////////// - -int libradosstriper::RadosStriperImpl::getxattr(const object_t& soid, - const char *name, - bufferlist& bl) -{ - std::string firstObjOid = getObjectId(soid, 0); - return m_ioCtx.getxattr(firstObjOid, name, bl); -} - -int libradosstriper::RadosStriperImpl::setxattr(const object_t& soid, - const char *name, - bufferlist& bl) -{ - std::string firstObjOid = getObjectId(soid, 0); - return m_ioCtx.setxattr(firstObjOid, name, bl); -} - -int libradosstriper::RadosStriperImpl::getxattrs(const object_t& soid, - map& attrset) -{ - std::string firstObjOid = getObjectId(soid, 0); - int rc = m_ioCtx.getxattrs(firstObjOid, attrset); - if (rc) return rc; - // cleanup internal attributes dedicated to striping and locking - attrset.erase(XATTR_LAYOUT_STRIPE_UNIT); - attrset.erase(XATTR_LAYOUT_STRIPE_COUNT); - attrset.erase(XATTR_LAYOUT_OBJECT_SIZE); - attrset.erase(XATTR_SIZE); - attrset.erase(std::string(LOCK_PREFIX) + RADOS_LOCK_NAME); - return rc; -} - -int libradosstriper::RadosStriperImpl::rmxattr(const object_t& soid, - const char *name) -{ - std::string firstObjOid = getObjectId(soid, 0); - return m_ioCtx.rmxattr(firstObjOid, name); -} - -///////////////////////// io ///////////////////////////// - -int libradosstriper::RadosStriperImpl::write(const std::string& soid, - const bufferlist& bl, - size_t len, - uint64_t off) -{ - // open the object. This will create it if needed, retrieve its layout - // and size and take a shared lock on it - ceph_file_layout layout; - std::string lockCookie; - int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true); - if (rc) return rc; - return write_in_open_object(soid, layout, lockCookie, bl, len, off); -} - -int libradosstriper::RadosStriperImpl::append(const std::string& soid, - const bufferlist& bl, - size_t len) -{ - // open the object. This will create it if needed, retrieve its layout - // and size and take a shared lock on it - ceph_file_layout layout; - uint64_t size = len; - std::string lockCookie; - int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false); - if (rc) return rc; - return write_in_open_object(soid, layout, lockCookie, bl, len, size); -} - -int libradosstriper::RadosStriperImpl::write_full(const std::string& soid, - const bufferlist& bl) -{ - int rc = trunc(soid, 0); - if (rc && rc != -ENOENT) return rc; // ENOENT is obviously ok - return write(soid, bl, bl.length(), 0); -} - -int libradosstriper::RadosStriperImpl::read(const std::string& soid, - bufferlist* bl, - size_t len, - uint64_t off) -{ - // create a completion object - librados::AioCompletionImpl c; - // call asynchronous method - int rc = aio_read(soid, &c, bl, len, off); - // and wait for completion - if (!rc) { - // wait for completion - c.wait_for_complete_and_cb(); - // return result - rc = c.get_return_value(); - } - return rc; -} - -///////////////////////// asynchronous io ///////////////////////////// - -int libradosstriper::RadosStriperImpl::aio_write(const std::string& soid, - librados::AioCompletionImpl *c, - const bufferlist& bl, - size_t len, - uint64_t off) -{ - ceph_file_layout layout; - std::string lockCookie; - int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true); - if (rc) return rc; - return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, off); -} - -int libradosstriper::RadosStriperImpl::aio_append(const std::string& soid, - librados::AioCompletionImpl *c, - const bufferlist& bl, - size_t len) -{ - ceph_file_layout layout; - uint64_t size = len; - std::string lockCookie; - int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false); - if (rc) return rc; - // create a completion object - return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, size); -} - -int libradosstriper::RadosStriperImpl::aio_write_full(const std::string& soid, - librados::AioCompletionImpl *c, - const bufferlist& bl) -{ - int rc = trunc(soid, 0); - if (rc) return rc; - return aio_write(soid, c, bl, bl.length(), 0); -} - -static void rados_read_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(c); - cdata->complete_unlock(comp->rval); - cdata->put(); -} - -static void striper_read_aio_req_complete(rados_striper_multi_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - // launch the async unlocking of the object - cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion); - // complete the read part in parallel - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(c); - cdata->complete_read(comp->rval); -} - -static void rados_req_read_safe(rados_completion_t c, void *arg) -{ - auto data = reinterpret_cast(arg); - int rc = rados_aio_get_return_value(c); - // ENOENT means that we are dealing with a sparse file. This is fine, - // data (0s) will be created on the fly by the rados_req_read_complete method - if (rc == -ENOENT) rc = 0; - auto multiAioComp = data->m_multiAioCompl; - multiAioComp->safe_request(rc); - data->put(); -} - -static void rados_req_read_complete(rados_completion_t c, void *arg) -{ - auto data = reinterpret_cast(arg); - int rc = rados_aio_get_return_value(c); - // We need to handle the case of sparse files here - if (rc == -ENOENT) { - // the object did not exist at all. This can happen for sparse files. - // we consider we've read 0 bytes and it will fall into next case - rc = 0; - } - if (rc >= 0 && (((uint64_t)rc) < data->m_expectedBytes)) { - // only partial data were present in the object (or the object did not - // even exist if we've gone through previous case). - // This is typical of sparse file and we need to complete with 0s. - unsigned int lenOfZeros = data->m_expectedBytes-rc; - unsigned int existingDataToZero = min(data->m_bl->length()-rc, lenOfZeros); - if (existingDataToZero > 0) { - data->m_bl->zero(rc, existingDataToZero); - } - if (lenOfZeros > existingDataToZero) { - ceph::bufferptr zeros(ceph::buffer::create(lenOfZeros-existingDataToZero)); - zeros.zero(); - data->m_bl->push_back(zeros); - } - rc = data->m_expectedBytes; - } - auto multiAioComp = data->m_multiAioCompl; - multiAioComp->complete_request(rc); - data->put(); -} - -int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid, - librados::AioCompletionImpl *c, - bufferlist* bl, - size_t len, - uint64_t off) -{ - // open the object. This will retrieve its layout and size - // and take a shared lock on it - ceph_file_layout layout; - uint64_t size; - std::string lockCookie; - int rc = openStripedObjectForRead(soid, &layout, &size, &lockCookie); - if (rc) return rc; - // find out the actual number of bytes we can read - uint64_t read_len; - if (off >= size) { - // nothing to read ! We are done. - read_len = 0; - } else { - read_len = min(len, (size_t)(size-off)); - } - // get list of extents to be read from - vector *extents = new vector(); - if (read_len > 0) { - std::string format = soid; - boost::replace_all(format, "%", "%%"); - format += RADOS_OBJECT_EXTENSION_FORMAT; - file_layout_t l; - l.from_legacy(layout); - Striper::file_to_extents(cct(), format.c_str(), &l, off, read_len, - 0, *extents); - } - - // create a completion object and transfer ownership of extents and resultbl - vector *resultbl = new vector(extents->size()); - ReadCompletionData *cdata = new ReadCompletionData(this, soid, lockCookie, c, - bl, extents, resultbl, 1); - c->is_read = true; - c->io = m_ioCtxImpl; - // create a completion for the unlocking of the striped object at the end of the read - librados::AioCompletion *unlock_completion = - librados::Rados::aio_create_completion(cdata, rados_read_aio_unlock_complete, 0); - cdata->m_unlockCompletion = unlock_completion; - // create the multiCompletion object handling the reads - MultiAioCompletionImplPtr nc{new libradosstriper::MultiAioCompletionImpl, - false}; - nc->set_complete_callback(cdata, striper_read_aio_req_complete); - // go through the extents - int r = 0, i = 0; - for (vector::iterator p = extents->begin(); p != extents->end(); ++p) { - // create a buffer list describing where to place data read from current extend - bufferlist *oid_bl = &((*resultbl)[i++]); - for (vector >::iterator q = p->buffer_extents.begin(); - q != p->buffer_extents.end(); - ++q) { - bufferlist buffer_bl; - buffer_bl.substr_of(*bl, q->first, q->second); - oid_bl->append(buffer_bl); - } - // read all extends of a given object in one go - nc->add_request(); - // we need 2 references on data as both rados_req_read_safe and rados_req_read_complete - // will release one - RadosReadCompletionData *data = new RadosReadCompletionData(nc, p->length, oid_bl, cct(), 2); - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(data, rados_req_read_complete, rados_req_read_safe); - r = m_ioCtx.aio_read(p->oid.name, rados_completion, oid_bl, p->length, p->offset); - rados_completion->release(); - if (r < 0) - break; - } - nc->finish_adding_requests(); - return r; -} - -int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid, - librados::AioCompletionImpl *c, - char* buf, - size_t len, - uint64_t off) -{ - // create a buffer list and store it inside the completion object - c->bl.clear(); - c->bl.push_back(buffer::create_static(len, buf)); - // call the bufferlist version of this method - return aio_read(soid, c, &c->bl, len, off); -} - -int libradosstriper::RadosStriperImpl::aio_flush() -{ - int ret; - // pass to the rados level - ret = m_ioCtx.aio_flush(); - if (ret < 0) - return ret; - //wait all CompletionData are released - lock.Lock(); - while (m_refCnt > 1) - cond.Wait(lock); - lock.Unlock(); - return ret; -} - -///////////////////////// stat and deletion ///////////////////////////// - -int libradosstriper::RadosStriperImpl::stat(const std::string& soid, uint64_t *psize, time_t *pmtime) -{ - // create a completion object - librados::AioCompletionImpl c; - // call asynchronous version of stat - int rc = aio_stat(soid, &c, psize, pmtime); - if (rc == 0) { - // wait for completion of the remove - c.wait_for_complete(); - // get result - rc = c.get_return_value(); - } - return rc; -} - -static void striper_stat_aio_stat_complete(rados_completion_t c, void *arg) { - auto data = reinterpret_cast(arg); - int rc = rados_aio_get_return_value(c); - if (rc == -ENOENT) { - // remember this has failed - data->m_statRC = rc; - } - data->m_multiCompletion->complete_request(rc); - data->put(); -} - -static void striper_stat_aio_getxattr_complete(rados_completion_t c, void *arg) { - auto data = reinterpret_cast(arg); - int rc = rados_aio_get_return_value(c); - // We need to handle the case of sparse files here - if (rc < 0) { - // remember this has failed - data->m_getxattrRC = rc; - } else { - // this intermediate string allows to add a null terminator before calling strtol - std::string err; - std::string strsize(data->m_bl.c_str(), data->m_bl.length()); - *data->m_psize = strict_strtoll(strsize.c_str(), 10, &err); - if (!err.empty()) { - lderr(data->m_striper->cct()) << XATTR_SIZE << " : " << err << dendl; - data->m_getxattrRC = -EINVAL; - } - rc = 0; - } - data->m_multiCompletion->complete_request(rc); - data->put(); -} - -static void striper_stat_aio_req_complete(rados_striper_multi_completion_t c, - void *arg) { - auto data = reinterpret_cast(arg); - if (data->m_statRC) { - data->complete(data->m_statRC); - } else { - if (data->m_getxattrRC < 0) { - data->complete(data->m_getxattrRC); - } else { - data->complete(0); - } - } - data->put(); -} - -template -int libradosstriper::RadosStriperImpl::aio_generic_stat -(const std::string& soid, - librados::AioCompletionImpl *c, - uint64_t *psize, - TimeType *pmtime, - typename libradosstriper::RadosStriperImpl::StatFunction::Type statFunction) -{ - // use a MultiAioCompletion object for dealing with the fact - // that we'll do 2 asynchronous calls in parallel - MultiAioCompletionImplPtr multi_completion{ - new libradosstriper::MultiAioCompletionImpl, false}; - // Data object used for passing context to asynchronous calls - std::string firstObjOid = getObjectId(soid, 0); - StatCompletionData *cdata = - new StatCompletionData(this, firstObjOid, c, - multi_completion.get(), psize, pmtime, 4); - multi_completion->set_complete_callback(cdata, striper_stat_aio_req_complete); - // use a regular AioCompletion for the stat async call - librados::AioCompletion *stat_completion = - librados::Rados::aio_create_completion(cdata, striper_stat_aio_stat_complete, 0); - multi_completion->add_safe_request(); - object_t obj(firstObjOid); - int rc = (m_ioCtxImpl->*statFunction)(obj, stat_completion->pc, - &cdata->m_objectSize, cdata->m_pmtime); - stat_completion->release(); - if (rc < 0) { - // nothing is really started so cancel everything - delete cdata; - return rc; - } - // use a regular AioCompletion for the getxattr async call - librados::AioCompletion *getxattr_completion = - librados::Rados::aio_create_completion(cdata, striper_stat_aio_getxattr_complete, 0); - multi_completion->add_safe_request(); - // in parallel, get the pmsize from the first object asynchronously - rc = m_ioCtxImpl->aio_getxattr(obj, getxattr_completion->pc, - XATTR_SIZE, cdata->m_bl); - getxattr_completion->release(); - multi_completion->finish_adding_requests(); - if (rc < 0) { - // the async stat is ongoing, so we need to go on - // we mark the getxattr as failed in the data object - cdata->m_getxattrRC = rc; - multi_completion->complete_request(rc); - return rc; - } - cdata->put(); - return 0; -} - -int libradosstriper::RadosStriperImpl::aio_stat(const std::string& soid, - librados::AioCompletionImpl *c, - uint64_t *psize, - time_t *pmtime) -{ - return aio_generic_stat(soid, c, psize, pmtime, &librados::IoCtxImpl::aio_stat); -} - -int libradosstriper::RadosStriperImpl::stat2(const std::string& soid, uint64_t *psize, struct timespec *pts) -{ - // create a completion object - librados::AioCompletionImpl c; - // call asynchronous version of stat - int rc = aio_stat2(soid, &c, psize, pts); - if (rc == 0) { - // wait for completion of the remove - c.wait_for_complete_and_cb(); - // get result - rc = c.get_return_value(); - } - return rc; -} - -int libradosstriper::RadosStriperImpl::aio_stat2(const std::string& soid, - librados::AioCompletionImpl *c, - uint64_t *psize, - struct timespec *pts) -{ - return aio_generic_stat(soid, c, psize, pts, &librados::IoCtxImpl::aio_stat2); -} - -static void rados_req_remove_complete(rados_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - int rc = rados_aio_get_return_value(c); - // in case the object did not exist, it means we had a sparse file, all is fine - if (rc == -ENOENT) { - rc = 0; - } - cdata->m_multiAioCompl->complete_request(rc); - cdata->put(); -} - -static void rados_req_remove_safe(rados_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - int rc = rados_aio_get_return_value(c); - // in case the object did not exist, it means we had a sparse file, all is fine - if (rc == -ENOENT) { - rc = 0; - } - cdata->m_multiAioCompl->safe_request(rc); - cdata->put(); -} - -static void striper_remove_aio_req_complete(rados_striper_multi_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(c); - ldout(cdata->m_striper->cct(), 10) - << "RadosStriperImpl : striper_remove_aio_req_complete called for " - << cdata->m_soid << dendl; - int rc = comp->rval; - if (rc == 0) { - // All went fine, synchronously remove first object - rc = cdata->m_striper->m_ioCtx.remove(cdata->m_striper->getObjectId(cdata->m_soid, 0), - cdata->flags); - } else { - lderr(cdata->m_striper->cct()) - << "RadosStriperImpl : deletion/truncation incomplete for " << cdata->m_soid - << ", as errors were encountered. The file is left present but it's content " - << " has been partially removed" - << dendl; - } - cdata->complete(rc); - cdata->put(); -} - -int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags) -{ - // create a completion object - librados::AioCompletionImpl c; - // call asynchronous version of remove - int rc = aio_remove(soid, &c, flags); - if (rc == 0) { - // wait for completion of the remove - c.wait_for_complete_and_cb(); - // get result - rc = c.get_return_value(); - } - return rc; -} - -int libradosstriper::RadosStriperImpl::aio_remove(const std::string& soid, - librados::AioCompletionImpl *c, - int flags) -{ - // the RemoveCompletionData object will lock the given soid for the duration - // of the removal - std::string lockCookie = getUUID(); - int rc = m_ioCtx.lock_exclusive(getObjectId(soid, 0), RADOS_LOCK_NAME, lockCookie, "", 0, 0); - if (rc) return rc; - // create CompletionData for the async remove call - RemoveCompletionData *cdata = new RemoveCompletionData(this, soid, lockCookie, c, flags); - MultiAioCompletionImplPtr multi_completion{ - new libradosstriper::MultiAioCompletionImpl, false}; - multi_completion->set_complete_callback(cdata, striper_remove_aio_req_complete); - // call asynchronous internal version of remove - ldout(cct(), 10) - << "RadosStriperImpl : Aio_remove starting for " - << soid << dendl; - rc = internal_aio_remove(soid, multi_completion); - return rc; -} - -int libradosstriper::RadosStriperImpl::internal_aio_remove( - const std::string& soid, - MultiAioCompletionImplPtr multi_completion, - int flags) -{ - std::string firstObjOid = getObjectId(soid, 0); - try { - // check size and get number of rados objects to delete - uint64_t nb_objects = 0; - bufferlist bl2; - int rc = getxattr(soid, XATTR_SIZE, bl2); - if (rc < 0) { - // no object size (or not able to get it) - // try to find the number of object "by hand" - uint64_t psize; - time_t pmtime; - while (!m_ioCtx.stat(getObjectId(soid, nb_objects), &psize, &pmtime)) { - nb_objects++; - } - } else { - // count total number of rados objects in the striped object - std::string err; - // this intermediate string allows to add a null terminator before calling strtol - std::string strsize(bl2.c_str(), bl2.length()); - uint64_t size = strict_strtoll(strsize.c_str(), 10, &err); - if (!err.empty()) { - lderr(cct()) << XATTR_SIZE << " : " << err << dendl; - - return -EINVAL; - } - uint64_t object_size = m_layout.fl_object_size; - uint64_t su = m_layout.fl_stripe_unit; - uint64_t stripe_count = m_layout.fl_stripe_count; - uint64_t nb_complete_sets = size / (object_size*stripe_count); - uint64_t remaining_data = size % (object_size*stripe_count); - uint64_t remaining_stripe_units = (remaining_data + su -1) / su; - uint64_t remaining_objects = std::min(remaining_stripe_units, stripe_count); - nb_objects = nb_complete_sets * stripe_count + remaining_objects; - } - // delete rados objects in reverse order - // Note that we do not drop the first object. This one will only be dropped - // if all other removals have been successful, and this is done in the - // callback of the multi_completion object - int rcr = 0; - for (int i = nb_objects-1; i >= 1; i--) { - multi_completion->add_request(); - RadosRemoveCompletionData *data = - new RadosRemoveCompletionData(multi_completion, cct()); - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(data, - rados_req_remove_complete, - rados_req_remove_safe); - if (flags == 0) { - rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion); - } else { - rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion, flags); - } - rados_completion->release(); - if (rcr < 0 and -ENOENT != rcr) { - lderr(cct()) << "RadosStriperImpl::remove : deletion incomplete for " << soid - << ", as " << getObjectId(soid, i) << " could not be deleted (rc=" << rc << ")" - << dendl; - break; - } - } - // we are over adding requests to the multi_completion object - multi_completion->finish_adding_requests(); - // return - return rcr; - } catch (ErrorCode &e) { - // errror caught when trying to take the exclusive lock - return e.m_code; - } - -} - -int libradosstriper::RadosStriperImpl::trunc(const std::string& soid, uint64_t size) -{ - // lock the object in exclusive mode - std::string firstObjOid = getObjectId(soid, 0); - librados::ObjectWriteOperation op; - op.assert_exists(); - std::string lockCookie = RadosStriperImpl::getUUID(); - utime_t dur = utime_t(); - rados::cls::lock::lock(&op, RADOS_LOCK_NAME, LOCK_EXCLUSIVE, lockCookie, "", "", dur, 0); - int rc = m_ioCtx.operate(firstObjOid, &op); - if (rc) return rc; - // load layout and size - ceph_file_layout layout; - uint64_t original_size; - rc = internal_get_layout_and_size(firstObjOid, &layout, &original_size); - if (!rc) { - if (size < original_size) { - rc = truncate(soid, original_size, size, layout); - } else if (size > original_size) { - rc = grow(soid, original_size, size, layout); - } - } - // unlock object, ignore return code as we cannot do much - m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie); - // final return - return rc; -} - - -///////////////////////// private helpers ///////////////////////////// - -std::string libradosstriper::RadosStriperImpl::getObjectId(const object_t& soid, - long long unsigned objectno) -{ - std::ostringstream s; - s << soid << '.' << std::setfill ('0') << std::setw(16) << std::hex << objectno; - return s.str(); -} - -void libradosstriper::RadosStriperImpl::unlockObject(const std::string& soid, - const std::string& lockCookie) -{ - // unlock the shared lock on the first rados object - std::string firstObjOid = getObjectId(soid, 0); - m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie); -} - -void libradosstriper::RadosStriperImpl::aio_unlockObject(const std::string& soid, - const std::string& lockCookie, - librados::AioCompletion *c) -{ - // unlock the shared lock on the first rados object - std::string firstObjOid = getObjectId(soid, 0); - m_ioCtx.aio_unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie, c); -} - -static void rados_write_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(c); - cdata->complete_unlock(comp->rval); - cdata->put(); -} - -static void striper_write_aio_req_complete(rados_striper_multi_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - // launch the async unlocking of the object - cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion); - // complete the write part in parallel - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(c); - cdata->complete_write(comp->rval); - cdata->put(); -} - -static void striper_write_aio_req_safe(rados_striper_multi_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(c); - cdata->safe(comp->rval); - cdata->put(); -} - -int libradosstriper::RadosStriperImpl::write_in_open_object(const std::string& soid, - const ceph_file_layout& layout, - const std::string& lockCookie, - const bufferlist& bl, - size_t len, - uint64_t off) { - // create a completion object to be passed to the callbacks of the multicompletion - // we need 3 references as striper_write_aio_req_complete will release two and - // striper_write_aio_req_safe will release one - WriteCompletionData *cdata = new WriteCompletionData(this, soid, lockCookie, 0, 3); - cdata->get(); // local ref - // create a completion object for the unlocking of the striped object at the end of the write - librados::AioCompletion *unlock_completion = - librados::Rados::aio_create_completion(cdata, rados_write_aio_unlock_complete, 0); - cdata->m_unlockCompletion = unlock_completion; - // create the multicompletion that will handle the write completion - MultiAioCompletionImplPtr c{new libradosstriper::MultiAioCompletionImpl, - false}; - c->set_complete_callback(cdata, striper_write_aio_req_complete); - c->set_safe_callback(cdata, striper_write_aio_req_safe); - // call the asynchronous API - int rc = internal_aio_write(soid, c, bl, len, off, layout); - if (!rc) { - // wait for completion and safety of data - c->wait_for_complete_and_cb(); - c->wait_for_safe_and_cb(); - // wait for the unlocking - unlock_completion->wait_for_complete(); - // return result - rc = c->get_return_value(); - } - cdata->put(); - return rc; -} - -int libradosstriper::RadosStriperImpl::aio_write_in_open_object(const std::string& soid, - librados::AioCompletionImpl *c, - const ceph_file_layout& layout, - const std::string& lockCookie, - const bufferlist& bl, - size_t len, - uint64_t off) { - // create a completion object to be passed to the callbacks of the multicompletion - // we need 3 references as striper_write_aio_req_complete will release two and - // striper_write_aio_req_safe will release one - WriteCompletionData *cdata = new WriteCompletionData(this, soid, lockCookie, c, 3); - cdata->get(); // local ref - m_ioCtxImpl->get(); - c->io = m_ioCtxImpl; - // create a completion object for the unlocking of the striped object at the end of the write - librados::AioCompletion *unlock_completion = - librados::Rados::aio_create_completion(cdata, rados_write_aio_unlock_complete, 0); - cdata->m_unlockCompletion = unlock_completion; - // create the multicompletion that will handle the write completion - libradosstriper::MultiAioCompletionImplPtr nc{ - new libradosstriper::MultiAioCompletionImpl, false}; - nc->set_complete_callback(cdata, striper_write_aio_req_complete); - nc->set_safe_callback(cdata, striper_write_aio_req_safe); - // internal asynchronous API - int rc = internal_aio_write(soid, nc, bl, len, off, layout); - cdata->put(); - return rc; -} - -static void rados_req_write_safe(rados_completion_t c, void *arg) -{ - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(arg); - comp->safe_request(rados_aio_get_return_value(c)); -} - -static void rados_req_write_complete(rados_completion_t c, void *arg) -{ - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(arg); - comp->complete_request(rados_aio_get_return_value(c)); -} - -int -libradosstriper::RadosStriperImpl::internal_aio_write(const std::string& soid, - libradosstriper::MultiAioCompletionImplPtr c, - const bufferlist& bl, - size_t len, - uint64_t off, - const ceph_file_layout& layout) -{ - int r = 0; - // Do not try anything if we are called with empty buffer, - // file_to_extents would raise an exception - if (len > 0) { - // get list of extents to be written to - vector extents; - std::string format = soid; - boost::replace_all(format, "%", "%%"); - format += RADOS_OBJECT_EXTENSION_FORMAT; - file_layout_t l; - l.from_legacy(layout); - Striper::file_to_extents(cct(), format.c_str(), &l, off, len, 0, extents); - // go through the extents - for (vector::iterator p = extents.begin(); p != extents.end(); ++p) { - // assemble pieces of a given object into a single buffer list - bufferlist oid_bl; - for (vector >::iterator q = p->buffer_extents.begin(); - q != p->buffer_extents.end(); - ++q) { - bufferlist buffer_bl; - buffer_bl.substr_of(bl, q->first, q->second); - oid_bl.append(buffer_bl); - } - // and write the object - c->add_request(); - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(c.get(), - rados_req_write_complete, - rados_req_write_safe); - r = m_ioCtx.aio_write(p->oid.name, rados_completion, oid_bl, - p->length, p->offset); - rados_completion->release(); - if (r < 0) - break; - } - } - c->finish_adding_requests(); - return r; -} - -int libradosstriper::RadosStriperImpl::extract_uint32_attr -(std::map &attrs, - const std::string& key, - ceph_le32 *value) -{ - std::map::iterator attrsIt = attrs.find(key); - if (attrsIt != attrs.end()) { - // this intermediate string allows to add a null terminator before calling strtol - std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length()); - std::string err; - *value = strict_strtol(strvalue.c_str(), 10, &err); - if (!err.empty()) { - lderr(cct()) << key << " : " << err << dendl; - return -EINVAL; - } - } else { - return -ENOENT; - } - return 0; -} - -int libradosstriper::RadosStriperImpl::extract_sizet_attr -(std::map &attrs, - const std::string& key, - size_t *value) -{ - std::map::iterator attrsIt = attrs.find(key); - if (attrsIt != attrs.end()) { - // this intermediate string allows to add a null terminator before calling strtol - std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length()); - std::string err; - *value = strict_strtoll(strvalue.c_str(), 10, &err); - if (!err.empty()) { - lderr(cct()) << key << " : " << err << dendl; - return -EINVAL; - } - } else { - return -ENOENT; - } - return 0; -} - -int libradosstriper::RadosStriperImpl::internal_get_layout_and_size( - const std::string& oid, - ceph_file_layout *layout, - uint64_t *size) -{ - // get external attributes of the first rados object - std::map attrs; - int rc = m_ioCtx.getxattrs(oid, attrs); - if (rc) return rc; - // deal with stripe_unit - rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_UNIT, &layout->fl_stripe_unit); - if (rc) return rc; - // deal with stripe_count - rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_COUNT, &layout->fl_stripe_count); - if (rc) return rc; - // deal with object_size - rc = extract_uint32_attr(attrs, XATTR_LAYOUT_OBJECT_SIZE, &layout->fl_object_size); - if (rc) return rc; - // deal with size - size_t ssize; - rc = extract_sizet_attr(attrs, XATTR_SIZE, &ssize); - if (rc) { - return rc; - } - *size = ssize; - // make valgrind happy by setting unused fl_pg_pool - layout->fl_pg_pool = 0; - return 0; -} - -int libradosstriper::RadosStriperImpl::openStripedObjectForRead( - const std::string& soid, - ceph_file_layout *layout, - uint64_t *size, - std::string *lockCookie) -{ - // take a lock the first rados object, if it exists and gets its size - // check, lock and size reading must be atomic and are thus done within a single operation - librados::ObjectWriteOperation op; - op.assert_exists(); - *lockCookie = getUUID(); - utime_t dur = utime_t(); - rados::cls::lock::lock(&op, RADOS_LOCK_NAME, LOCK_SHARED, *lockCookie, "Tag", "", dur, 0); - std::string firstObjOid = getObjectId(soid, 0); - int rc = m_ioCtx.operate(firstObjOid, &op); - if (rc) { - // error case (including -ENOENT) - return rc; - } - rc = internal_get_layout_and_size(firstObjOid, layout, size); - if (rc) { - unlockObject(soid, *lockCookie); - lderr(cct()) << "RadosStriperImpl::openStripedObjectForRead : " - << "could not load layout and size for " - << soid << " : rc = " << rc << dendl; - } - return rc; -} - -int libradosstriper::RadosStriperImpl::openStripedObjectForWrite(const std::string& soid, - ceph_file_layout *layout, - uint64_t *size, - std::string *lockCookie, - bool isFileSizeAbsolute) -{ - // take a lock the first rados object, if it exists - // check and lock must be atomic and are thus done within a single operation - librados::ObjectWriteOperation op; - op.assert_exists(); - *lockCookie = getUUID(); - utime_t dur = utime_t(); - rados::cls::lock::lock(&op, RADOS_LOCK_NAME, LOCK_SHARED, *lockCookie, "Tag", "", dur, 0); - std::string firstObjOid = getObjectId(soid, 0); - int rc = m_ioCtx.operate(firstObjOid, &op); - if (rc) { - if (rc == -ENOENT) { - // object does not exist, delegate to createEmptyStripedObject - int rc = createAndOpenStripedObject(soid, layout, *size, lockCookie, isFileSizeAbsolute); - // return original size - *size = 0; - return rc; - } else { - return rc; - } - } - // all fine - uint64_t curSize; - rc = internal_get_layout_and_size(firstObjOid, layout, &curSize); - if (rc) { - unlockObject(soid, *lockCookie); - lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : " - << "could not load layout and size for " - << soid << " : rc = " << rc << dendl; - return rc; - } - // atomically update object size, only if smaller than current one - if (!isFileSizeAbsolute) - *size += curSize; - librados::ObjectWriteOperation writeOp; - writeOp.cmpxattr(XATTR_SIZE, LIBRADOS_CMPXATTR_OP_GT, *size); - std::ostringstream oss; - oss << *size; - bufferlist bl; - bl.append(oss.str()); - writeOp.setxattr(XATTR_SIZE, bl); - rc = m_ioCtx.operate(firstObjOid, &writeOp); - // return current size - *size = curSize; - // handle case where objectsize is already bigger than size - if (-ECANCELED == rc) - rc = 0; - if (rc) { - unlockObject(soid, *lockCookie); - lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : " - << "could not set new size for " - << soid << " : rc = " << rc << dendl; - } - return rc; -} - -int libradosstriper::RadosStriperImpl::createAndOpenStripedObject(const std::string& soid, - ceph_file_layout *layout, - uint64_t size, - std::string *lockCookie, - bool isFileSizeAbsolute) -{ - // build atomic write operation - librados::ObjectWriteOperation writeOp; - writeOp.create(true); - // object_size - std::ostringstream oss_object_size; - oss_object_size << m_layout.fl_object_size; - bufferlist bl_object_size; - bl_object_size.append(oss_object_size.str()); - writeOp.setxattr(XATTR_LAYOUT_OBJECT_SIZE, bl_object_size); - // stripe unit - std::ostringstream oss_stripe_unit; - oss_stripe_unit << m_layout.fl_stripe_unit; - bufferlist bl_stripe_unit; - bl_stripe_unit.append(oss_stripe_unit.str()); - writeOp.setxattr(XATTR_LAYOUT_STRIPE_UNIT, bl_stripe_unit); - // stripe count - std::ostringstream oss_stripe_count; - oss_stripe_count << m_layout.fl_stripe_count; - bufferlist bl_stripe_count; - bl_stripe_count.append(oss_stripe_count.str()); - writeOp.setxattr(XATTR_LAYOUT_STRIPE_COUNT, bl_stripe_count); - // size - std::ostringstream oss_size; - oss_size << (isFileSizeAbsolute?size:0); - bufferlist bl_size; - bl_size.append(oss_size.str()); - writeOp.setxattr(XATTR_SIZE, bl_size); - // effectively change attributes - std::string firstObjOid = getObjectId(soid, 0); - int rc = m_ioCtx.operate(firstObjOid, &writeOp); - // in case of error (but no EEXIST which would mean the object existed), return - if (rc && -EEXIST != rc) return rc; - // Otherwise open the object - uint64_t fileSize = size; - return openStripedObjectForWrite(soid, layout, &fileSize, lockCookie, isFileSizeAbsolute); -} - -static void striper_truncate_aio_req_complete(rados_striper_multi_completion_t c, void *arg) -{ - auto cdata = reinterpret_cast(arg); - libradosstriper::MultiAioCompletionImpl *comp = - reinterpret_cast(c); - if (0 == comp->rval) { - // all went fine, change size in the external attributes - std::ostringstream oss; - oss << cdata->m_size; - bufferlist bl; - bl.append(oss.str()); - cdata->m_striper->setxattr(cdata->m_soid, XATTR_SIZE, bl); - } - cdata->put(); -} - -int libradosstriper::RadosStriperImpl::truncate(const std::string& soid, - uint64_t original_size, - uint64_t size, - ceph_file_layout &layout) -{ - TruncateCompletionData *cdata = new TruncateCompletionData(this, soid, size); - libradosstriper::MultiAioCompletionImplPtr multi_completion{ - new libradosstriper::MultiAioCompletionImpl, false}; - multi_completion->set_complete_callback(cdata, striper_truncate_aio_req_complete); - // call asynchrous version of truncate - int rc = aio_truncate(soid, multi_completion, original_size, size, layout); - // wait for completion of the truncation - multi_completion->finish_adding_requests(); - multi_completion->wait_for_complete_and_cb(); - // return result - if (rc == 0) { - rc = multi_completion->get_return_value(); - } - return rc; -} - -int libradosstriper::RadosStriperImpl::aio_truncate -(const std::string& soid, - libradosstriper::MultiAioCompletionImplPtr multi_completion, - uint64_t original_size, - uint64_t size, - ceph_file_layout &layout) -{ - // handle the underlying rados objects. 3 cases here : - // -- the objects belonging to object sets entirely located - // before the truncation are unchanged - // -- the objects belonging to the object set where the - // truncation took place are truncated or removed - // -- the objects belonging to object sets entirely located - // after the truncation are removed - // Note that we do it backward and that we change the size in - // the external attributes only at the end. This make sure that - // no rados object stays behind if we remove the striped object - // after a truncation has failed - uint64_t trunc_objectsetno = size / layout.fl_object_size / layout.fl_stripe_count; - uint64_t last_objectsetno = original_size / layout.fl_object_size / layout.fl_stripe_count; - bool exists = false; - for (int64_t objectno = (last_objectsetno+1) * layout.fl_stripe_count-1; - objectno >= (int64_t)((trunc_objectsetno + 1) * layout.fl_stripe_count); - objectno--) { - // if no object existed so far, check object existence - if (!exists) { - uint64_t nb_full_object_set = objectno / layout.fl_stripe_count; - uint64_t object_index_in_set = objectno % layout.fl_stripe_count; - uint64_t set_start_off = nb_full_object_set * layout.fl_object_size * layout.fl_stripe_count; - uint64_t object_start_off = set_start_off + object_index_in_set * layout.fl_stripe_unit; - exists = (original_size > object_start_off); - } - if (exists) { - // remove asynchronously - multi_completion->add_request(); - RadosRemoveCompletionData *data = - new RadosRemoveCompletionData(multi_completion, cct()); - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(data, - rados_req_remove_complete, - rados_req_remove_safe); - int rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion); - rados_completion->release(); - // in case the object did not exist, it means we had a sparse file, all is fine - if (rc && rc != -ENOENT) return rc; - } - } - for (int64_t objectno = ((trunc_objectsetno + 1) * layout.fl_stripe_count) -1; - objectno >= (int64_t)(trunc_objectsetno * layout.fl_stripe_count); - objectno--) { - // if no object existed so far, check object existence - if (!exists) { - uint64_t object_start_off = ((objectno / layout.fl_stripe_count) * layout.fl_object_size) + - ((objectno % layout.fl_stripe_count) * layout.fl_stripe_unit); - exists = (original_size > object_start_off); - } - if (exists) { - // truncate - file_layout_t l; - l.from_legacy(layout); - uint64_t new_object_size = Striper::object_truncate_size(cct(), &l, objectno, size); - int rc; - if (new_object_size > 0 or 0 == objectno) { - // trunc is synchronous as there is no async version - // but note that only a single object will be truncated - // reducing the overload to a fixed amount - rc = m_ioCtx.trunc(getObjectId(soid, objectno), new_object_size); - } else { - // removes are asynchronous in order to speed up truncations of big files - multi_completion->add_request(); - RadosRemoveCompletionData *data = - new RadosRemoveCompletionData(multi_completion, cct()); - librados::AioCompletion *rados_completion = - librados::Rados::aio_create_completion(data, - rados_req_remove_complete, - rados_req_remove_safe); - rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion); - rados_completion->release(); - } - // in case the object did not exist, it means we had a sparse file, all is fine - if (rc && rc != -ENOENT) return rc; - } - } - return 0; -} - -int libradosstriper::RadosStriperImpl::grow(const std::string& soid, - uint64_t original_size, - uint64_t size, - ceph_file_layout &layout) -{ - // handle the underlying rados objects. As we support sparse objects, - // we only have to change the size in the external attributes - std::ostringstream oss; - oss << size; - bufferlist bl; - bl.append(oss.str()); - int rc = m_ioCtx.setxattr(getObjectId(soid, 0), XATTR_SIZE, bl); - return rc; -} - -std::string libradosstriper::RadosStriperImpl::getUUID() -{ - struct uuid_d uuid; - uuid.generate_random(); - char suuid[37]; - uuid.print(suuid); - return std::string(suuid); -}