#ifndef __LIBRADOS_HPP #define __LIBRADOS_HPP #include #include #include #include #include #include #include #include "memory.h" #include "buffer.h" #include "librados.h" #include "rados_types.hpp" namespace libradosstriper { class RadosStriper; } namespace librados { using ceph::bufferlist; struct AioCompletionImpl; class IoCtx; struct IoCtxImpl; class ObjectOperationImpl; struct ObjListCtx; struct PoolAsyncCompletionImpl; class RadosClient; struct ListObjectImpl; class NObjectIteratorImpl; typedef void *list_ctx_t; typedef uint64_t auid_t; typedef void *config_t; typedef struct rados_cluster_stat_t cluster_stat_t; typedef struct rados_pool_stat_t pool_stat_t; typedef struct { std::string client; std::string cookie; std::string address; } locker_t; typedef std::map stats_map; typedef void *completion_t; typedef void (*callback_t)(completion_t cb, void *arg); class CEPH_RADOS_API ListObject { public: const std::string& get_nspace() const; const std::string& get_oid() const; const std::string& get_locator() const; ListObject(); ~ListObject(); ListObject( const ListObject&); ListObject& operator=(const ListObject& rhs); private: ListObject(ListObjectImpl *impl); friend class NObjectIteratorImpl; friend std::ostream& operator<<(std::ostream& out, const ListObject& lop); ListObjectImpl *impl; }; CEPH_RADOS_API std::ostream& operator<<(std::ostream& out, const librados::ListObject& lop); class CEPH_RADOS_API NObjectIterator; class CEPH_RADOS_API ObjectCursor { public: ObjectCursor(); ObjectCursor(const ObjectCursor &rhs); explicit ObjectCursor(rados_object_list_cursor c); ~ObjectCursor(); ObjectCursor& operator=(const ObjectCursor& rhs); bool operator<(const ObjectCursor &rhs) const; bool operator==(const ObjectCursor &rhs) const; void set(rados_object_list_cursor c); friend class IoCtx; friend class NObjectIteratorImpl; friend std::ostream& operator<<(std::ostream& os, const librados::ObjectCursor& oc); std::string to_str() const; bool from_str(const std::string& s); protected: rados_object_list_cursor c_cursor; }; CEPH_RADOS_API std::ostream& operator<<(std::ostream& os, const librados::ObjectCursor& oc); class CEPH_RADOS_API NObjectIterator : public std::iterator { public: static const NObjectIterator __EndObjectIterator; NObjectIterator(): impl(NULL) {} ~NObjectIterator(); NObjectIterator(const NObjectIterator &rhs); NObjectIterator& operator=(const NObjectIterator& rhs); bool operator==(const NObjectIterator& rhs) const; bool operator!=(const NObjectIterator& rhs) const; const ListObject& operator*() const; const ListObject* operator->() const; NObjectIterator &operator++(); // Preincrement NObjectIterator operator++(int); // Postincrement friend class IoCtx; friend class NObjectIteratorImpl; /// get current hash position of the iterator, rounded to the current pg uint32_t get_pg_hash_position() const; /// move the iterator to a given hash position. this may (will!) be rounded to the nearest pg. uint32_t seek(uint32_t pos); /// move the iterator to a given cursor position uint32_t seek(const ObjectCursor& cursor); /// get current cursor position ObjectCursor get_cursor(); /** * Configure PGLS filter to be applied OSD-side (requires caller * to know/understand the format expected by the OSD) */ void set_filter(const bufferlist &bl); private: NObjectIterator(ObjListCtx *ctx_); void get_next(); NObjectIteratorImpl *impl; }; class CEPH_RADOS_API ObjectItem { public: std::string oid; std::string nspace; std::string locator; }; /// DEPRECATED; do not use class CEPH_RADOS_API WatchCtx { public: virtual ~WatchCtx(); virtual void notify(uint8_t opcode, uint64_t ver, bufferlist& bl) = 0; }; class CEPH_RADOS_API WatchCtx2 { public: virtual ~WatchCtx2(); /** * Callback activated when we receive a notify event. * * @param notify_id unique id for this notify event * @param cookie the watcher we are notifying * @param notifier_id the unique client id of the notifier * @param bl opaque notify payload (from the notifier) */ virtual void handle_notify(uint64_t notify_id, uint64_t cookie, uint64_t notifier_id, bufferlist& bl) = 0; /** * Callback activated when we encounter an error with the watch. * * Errors we may see: * -ENOTCONN : our watch was disconnected * -ETIMEDOUT : our watch is still valid, but we may have missed * a notify event. * * @param cookie the watcher with the problem * @param err error */ virtual void handle_error(uint64_t cookie, int err) = 0; }; struct CEPH_RADOS_API AioCompletion { AioCompletion(AioCompletionImpl *pc_) : pc(pc_) {} int set_complete_callback(void *cb_arg, callback_t cb); int set_safe_callback(void *cb_arg, callback_t cb); int wait_for_complete(); int wait_for_safe(); int wait_for_complete_and_cb(); int wait_for_safe_and_cb(); bool is_complete(); bool is_safe(); bool is_complete_and_cb(); bool is_safe_and_cb(); int get_return_value(); int get_version() __attribute__ ((deprecated)); uint64_t get_version64(); void release(); AioCompletionImpl *pc; }; struct CEPH_RADOS_API PoolAsyncCompletion { PoolAsyncCompletion(PoolAsyncCompletionImpl *pc_) : pc(pc_) {} int set_callback(void *cb_arg, callback_t cb); int wait(); bool is_complete(); int get_return_value(); void release(); PoolAsyncCompletionImpl *pc; }; /** * These are per-op flags which may be different among * ops added to an ObjectOperation. */ enum ObjectOperationFlags { OP_EXCL = LIBRADOS_OP_FLAG_EXCL, OP_FAILOK = LIBRADOS_OP_FLAG_FAILOK, OP_FADVISE_RANDOM = LIBRADOS_OP_FLAG_FADVISE_RANDOM, OP_FADVISE_SEQUENTIAL = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL, OP_FADVISE_WILLNEED = LIBRADOS_OP_FLAG_FADVISE_WILLNEED, OP_FADVISE_DONTNEED = LIBRADOS_OP_FLAG_FADVISE_DONTNEED, OP_FADVISE_NOCACHE = LIBRADOS_OP_FLAG_FADVISE_NOCACHE, }; class CEPH_RADOS_API ObjectOperationCompletion { public: virtual ~ObjectOperationCompletion() {} virtual void handle_completion(int r, bufferlist& outbl) = 0; }; /** * These flags apply to the ObjectOperation as a whole. * * BALANCE_READS and LOCALIZE_READS should only be used * when reading from data you're certain won't change, * like a snapshot, or where eventual consistency is ok. * * ORDER_READS_WRITES will order reads the same way writes are * ordered (e.g., waiting for degraded objects). In particular, it * will make a write followed by a read sequence be preserved. * * IGNORE_CACHE will skip the caching logic on the OSD that normally * handles promotion of objects between tiers. This allows an operation * to operate (or read) the cached (or uncached) object, even if it is * not coherent. * * IGNORE_OVERLAY will ignore the pool overlay tiering metadata and * process the op directly on the destination pool. This is useful * for CACHE_FLUSH and CACHE_EVICT operations. */ enum ObjectOperationGlobalFlags { OPERATION_NOFLAG = LIBRADOS_OPERATION_NOFLAG, OPERATION_BALANCE_READS = LIBRADOS_OPERATION_BALANCE_READS, OPERATION_LOCALIZE_READS = LIBRADOS_OPERATION_LOCALIZE_READS, OPERATION_ORDER_READS_WRITES = LIBRADOS_OPERATION_ORDER_READS_WRITES, OPERATION_IGNORE_CACHE = LIBRADOS_OPERATION_IGNORE_CACHE, OPERATION_SKIPRWLOCKS = LIBRADOS_OPERATION_SKIPRWLOCKS, OPERATION_IGNORE_OVERLAY = LIBRADOS_OPERATION_IGNORE_OVERLAY, // send requests to cluster despite the cluster or pool being // marked full; ops will either succeed (e.g., delete) or return // EDQUOT or ENOSPC OPERATION_FULL_TRY = LIBRADOS_OPERATION_FULL_TRY, //mainly for delete OPERATION_FULL_FORCE = LIBRADOS_OPERATION_FULL_FORCE, OPERATION_IGNORE_REDIRECT = LIBRADOS_OPERATION_IGNORE_REDIRECT, }; /* * Alloc hint flags for the alloc_hint operation. */ enum AllocHintFlags { ALLOC_HINT_FLAG_SEQUENTIAL_WRITE = 1, ALLOC_HINT_FLAG_RANDOM_WRITE = 2, ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4, ALLOC_HINT_FLAG_RANDOM_READ = 8, ALLOC_HINT_FLAG_APPEND_ONLY = 16, ALLOC_HINT_FLAG_IMMUTABLE = 32, ALLOC_HINT_FLAG_SHORTLIVED = 64, ALLOC_HINT_FLAG_LONGLIVED = 128, ALLOC_HINT_FLAG_COMPRESSIBLE = 256, ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512, }; /* * ObjectOperation : compound object operation * Batch multiple object operations into a single request, to be applied * atomically. */ class CEPH_RADOS_API ObjectOperation { public: ObjectOperation(); virtual ~ObjectOperation(); size_t size(); void set_op_flags(ObjectOperationFlags flags) __attribute__((deprecated)); //flag mean ObjectOperationFlags void set_op_flags2(int flags); void cmpext(uint64_t off, bufferlist& cmp_bl, int *prval); void cmpxattr(const char *name, uint8_t op, const bufferlist& val); void cmpxattr(const char *name, uint8_t op, uint64_t v); void exec(const char *cls, const char *method, bufferlist& inbl); void exec(const char *cls, const char *method, bufferlist& inbl, bufferlist *obl, int *prval); void exec(const char *cls, const char *method, bufferlist& inbl, ObjectOperationCompletion *completion); /** * Guard operation with a check that object version == ver * * @param ver [in] version to check */ void assert_version(uint64_t ver); /** * Guard operatation with a check that the object already exists */ void assert_exists(); /** * get key/value pairs for specified keys * * @param assertions [in] comparison assertions * @param prval [out] place error code in prval upon completion * * assertions has the form of mappings from keys to (comparison rval, assertion) * The assertion field may be CEPH_OSD_CMPXATTR_OP_[GT|LT|EQ]. * * That is, to assert that the value at key 'foo' is greater than 'bar': * * ObjectReadOperation op; * int r; * map > assertions; * bufferlist bar(string('bar')); * assertions['foo'] = make_pair(bar, CEPH_OSD_CMP_XATTR_OP_GT); * op.omap_cmp(assertions, &r); */ void omap_cmp( const std::map > &assertions, int *prval); protected: ObjectOperationImpl *impl; ObjectOperation(const ObjectOperation& rhs); ObjectOperation& operator=(const ObjectOperation& rhs); friend class IoCtx; friend class Rados; }; /* * ObjectWriteOperation : compound object write operation * Batch multiple object operations into a single request, to be applied * atomically. */ class CEPH_RADOS_API ObjectWriteOperation : public ObjectOperation { protected: time_t *unused; public: ObjectWriteOperation() : unused(NULL) {} ~ObjectWriteOperation() override {} void mtime(time_t *pt); void mtime2(struct timespec *pts); void create(bool exclusive); void create(bool exclusive, const std::string& category); ///< NOTE: category is unused void write(uint64_t off, const bufferlist& bl); void write_full(const bufferlist& bl); void writesame(uint64_t off, uint64_t write_len, const bufferlist& bl); void append(const bufferlist& bl); void remove(); void truncate(uint64_t off); void zero(uint64_t off, uint64_t len); void rmxattr(const char *name); void setxattr(const char *name, const bufferlist& bl); void tmap_update(const bufferlist& cmdbl); void tmap_put(const bufferlist& bl); void selfmanaged_snap_rollback(uint64_t snapid); /** * Rollback an object to the specified snapshot id * * Used with pool snapshots * * @param snapid [in] snopshot id specified */ void snap_rollback(uint64_t snapid); /** * set keys and values according to map * * @param map [in] keys and values to set */ void omap_set(const std::map &map); /** * set header * * @param bl [in] header to set */ void omap_set_header(const bufferlist &bl); /** * Clears omap contents */ void omap_clear(); /** * Clears keys in to_rm * * @param to_rm [in] keys to remove */ void omap_rm_keys(const std::set &to_rm); /** * Copy an object * * Copies an object from another location. The operation is atomic in that * the copy either succeeds in its entirety or fails (e.g., because the * source object was modified while the copy was in progress). * * @param src source object name * @param src_ioctx ioctx for the source object * @param src_version current version of the source object * @param src_fadvise_flags the fadvise flags for source object */ void copy_from(const std::string& src, const IoCtx& src_ioctx, uint64_t src_version); void copy_from2(const std::string& src, const IoCtx& src_ioctx, uint64_t src_version, uint32_t src_fadvise_flags); /** * undirty an object * * Clear an objects dirty flag */ void undirty(); /** * Set allocation hint for an object * * @param expected_object_size expected size of the object, in bytes * @param expected_write_size expected size of writes to the object, in bytes * @param flags flags () */ void set_alloc_hint(uint64_t expected_object_size, uint64_t expected_write_size); void set_alloc_hint2(uint64_t expected_object_size, uint64_t expected_write_size, uint32_t flags); /** * Pin/unpin an object in cache tier * * @returns 0 on success, negative error code on failure */ void cache_pin(); void cache_unpin(); /** * Extensible tier * * Set redirect target */ void set_redirect(const std::string& tgt_obj, const IoCtx& tgt_ioctx, uint64_t tgt_version); friend class IoCtx; }; /* * ObjectReadOperation : compound object operation that return value * Batch multiple object operations into a single request, to be applied * atomically. */ class CEPH_RADOS_API ObjectReadOperation : public ObjectOperation { public: ObjectReadOperation() {} ~ObjectReadOperation() override {} void stat(uint64_t *psize, time_t *pmtime, int *prval); void stat2(uint64_t *psize, struct timespec *pts, int *prval); void getxattr(const char *name, bufferlist *pbl, int *prval); void getxattrs(std::map *pattrs, int *prval); void read(size_t off, uint64_t len, bufferlist *pbl, int *prval); void checksum(rados_checksum_type_t type, const bufferlist &init_value_bl, uint64_t off, size_t len, size_t chunk_size, bufferlist *pbl, int *prval); /** * see aio_sparse_read() */ void sparse_read(uint64_t off, uint64_t len, std::map *m, bufferlist *data_bl, int *prval); void tmap_get(bufferlist *pbl, int *prval); /** * omap_get_vals: keys and values from the object omap * * Get up to max_return keys and values beginning after start_after * * @param start_after [in] list no keys smaller than start_after * @param max_return [in] list no more than max_return key/value pairs * @param out_vals [out] place returned values in out_vals on completion * @param prval [out] place error code in prval upon completion */ void omap_get_vals( const std::string &start_after, uint64_t max_return, std::map *out_vals, int *prval) __attribute__ ((deprecated)); // use v2 /** * omap_get_vals: keys and values from the object omap * * Get up to max_return keys and values beginning after start_after * * @param start_after [in] list no keys smaller than start_after * @param max_return [in] list no more than max_return key/value pairs * @param out_vals [out] place returned values in out_vals on completion * @param prval [out] place error code in prval upon completion */ void omap_get_vals2( const std::string &start_after, uint64_t max_return, std::map *out_vals, bool *pmore, int *prval); /** * omap_get_vals: keys and values from the object omap * * Get up to max_return keys and values beginning after start_after * * @param start_after [in] list keys starting after start_after * @param filter_prefix [in] list only keys beginning with filter_prefix * @param max_return [in] list no more than max_return key/value pairs * @param out_vals [out] place returned values in out_vals on completion * @param prval [out] place error code in prval upon completion */ void omap_get_vals( const std::string &start_after, const std::string &filter_prefix, uint64_t max_return, std::map *out_vals, int *prval) __attribute__ ((deprecated)); // use v2 /** * omap_get_vals2: keys and values from the object omap * * Get up to max_return keys and values beginning after start_after * * @param start_after [in] list keys starting after start_after * @param filter_prefix [in] list only keys beginning with filter_prefix * @param max_return [in] list no more than max_return key/value pairs * @param out_vals [out] place returned values in out_vals on completion * @param pmore [out] pointer to bool indicating whether there are more keys * @param prval [out] place error code in prval upon completion */ void omap_get_vals2( const std::string &start_after, const std::string &filter_prefix, uint64_t max_return, std::map *out_vals, bool *pmore, int *prval); /** * omap_get_keys: keys from the object omap * * Get up to max_return keys beginning after start_after * * @param start_after [in] list keys starting after start_after * @param max_return [in] list no more than max_return keys * @param out_keys [out] place returned values in out_keys on completion * @param prval [out] place error code in prval upon completion */ void omap_get_keys(const std::string &start_after, uint64_t max_return, std::set *out_keys, int *prval) __attribute__ ((deprecated)); // use v2 /** * omap_get_keys2: keys from the object omap * * Get up to max_return keys beginning after start_after * * @param start_after [in] list keys starting after start_after * @param max_return [in] list no more than max_return keys * @param out_keys [out] place returned values in out_keys on completion * @param pmore [out] pointer to bool indicating whether there are more keys * @param prval [out] place error code in prval upon completion */ void omap_get_keys2(const std::string &start_after, uint64_t max_return, std::set *out_keys, bool *pmore, int *prval); /** * omap_get_header: get header from object omap * * @param header [out] place header here upon completion * @param prval [out] place error code in prval upon completion */ void omap_get_header(bufferlist *header, int *prval); /** * get key/value pairs for specified keys * * @param keys [in] keys to get * @param map [out] place key/value pairs found here on completion * @param prval [out] place error code in prval upon completion */ void omap_get_vals_by_keys(const std::set &keys, std::map *map, int *prval); /** * list_watchers: Get list watchers of object * * @param out_watchers [out] place returned values in out_watchers on completion * @param prval [out] place error code in prval upon completion */ void list_watchers(std::list *out_watchers, int *prval); /** * list snapshot clones associated with a logical object * * This will include a record for each version of the object, * include the "HEAD" (which will have a cloneid of SNAP_HEAD). * Each clone includes a vector of snap ids for which it is * defined to exist. * * NOTE: this operation must be submitted from an IoCtx with a * read snapid of SNAP_DIR for reliable results. * * @param out_snaps [out] pointer to resulting snap_set_t * @param prval [out] place error code in prval upon completion */ void list_snaps(snap_set_t *out_snaps, int *prval); /** * query dirty state of an object * * @param isdirty [out] pointer to resulting bool * @param prval [out] place error code in prval upon completion */ void is_dirty(bool *isdirty, int *prval); /** * flush a cache tier object to backing tier; will block racing * updates. * * This should be used in concert with OPERATION_IGNORE_CACHE to avoid * triggering a promotion. */ void cache_flush(); /** * Flush a cache tier object to backing tier; will EAGAIN if we race * with an update. Must be used with the SKIPRWLOCKS flag. * * This should be used in concert with OPERATION_IGNORE_CACHE to avoid * triggering a promotion. */ void cache_try_flush(); /** * evict a clean cache tier object * * This should be used in concert with OPERATION_IGNORE_CACHE to avoid * triggering a promote on the OSD (that is then evicted). */ void cache_evict(); }; /* IoCtx : This is a context in which we can perform I/O. * It includes a Pool, * * Typical use (error checking omitted): * * IoCtx p; * rados.ioctx_create("my_pool", p); * p->stat(&stats); * ... etc ... * * NOTE: be sure to call watch_flush() prior to destroying any IoCtx * that is used for watch events to ensure that racing callbacks * have completed. */ class CEPH_RADOS_API IoCtx { public: IoCtx(); static void from_rados_ioctx_t(rados_ioctx_t p, IoCtx &pool); IoCtx(const IoCtx& rhs); IoCtx& operator=(const IoCtx& rhs); ~IoCtx(); // Close our pool handle void close(); // deep copy void dup(const IoCtx& rhs); // set pool auid int set_auid(uint64_t auid_); // set pool auid int set_auid_async(uint64_t auid_, PoolAsyncCompletion *c); // get pool auid int get_auid(uint64_t *auid_); uint64_t get_instance_id() const; std::string get_pool_name(); bool pool_requires_alignment(); int pool_requires_alignment2(bool * requires); uint64_t pool_required_alignment(); int pool_required_alignment2(uint64_t * alignment); // create an object int create(const std::string& oid, bool exclusive); int create(const std::string& oid, bool exclusive, const std::string& category); ///< category is unused /** * write bytes to an object at a specified offset * * NOTE: this call steals the contents of @param bl. */ int write(const std::string& oid, bufferlist& bl, size_t len, uint64_t off); /** * append bytes to an object * * NOTE: this call steals the contents of @param bl. */ int append(const std::string& oid, bufferlist& bl, size_t len); /** * replace object contents with provided data * * NOTE: this call steals the contents of @param bl. */ int write_full(const std::string& oid, bufferlist& bl); int writesame(const std::string& oid, bufferlist& bl, size_t write_len, uint64_t off); int read(const std::string& oid, bufferlist& bl, size_t len, uint64_t off); int checksum(const std::string& o, rados_checksum_type_t type, const bufferlist &init_value_bl, size_t len, uint64_t off, size_t chunk_size, bufferlist *pbl); int remove(const std::string& oid); int remove(const std::string& oid, int flags); int trunc(const std::string& oid, uint64_t size); int mapext(const std::string& o, uint64_t off, size_t len, std::map& m); int cmpext(const std::string& o, uint64_t off, bufferlist& cmp_bl); int sparse_read(const std::string& o, std::map& m, bufferlist& bl, size_t len, uint64_t off); int getxattr(const std::string& oid, const char *name, bufferlist& bl); int getxattrs(const std::string& oid, std::map& attrset); int setxattr(const std::string& oid, const char *name, bufferlist& bl); int rmxattr(const std::string& oid, const char *name); int stat(const std::string& oid, uint64_t *psize, time_t *pmtime); int stat2(const std::string& oid, uint64_t *psize, struct timespec *pts); int exec(const std::string& oid, const char *cls, const char *method, bufferlist& inbl, bufferlist& outbl); /** * modify object tmap based on encoded update sequence * * NOTE: this call steals the contents of @param bl */ int tmap_update(const std::string& oid, bufferlist& cmdbl); /** * replace object contents with provided encoded tmap data * * NOTE: this call steals the contents of @param bl */ int tmap_put(const std::string& oid, bufferlist& bl); int tmap_get(const std::string& oid, bufferlist& bl); int tmap_to_omap(const std::string& oid, bool nullok=false); int omap_get_vals(const std::string& oid, const std::string& start_after, uint64_t max_return, std::map *out_vals); int omap_get_vals2(const std::string& oid, const std::string& start_after, uint64_t max_return, std::map *out_vals, bool *pmore); int omap_get_vals(const std::string& oid, const std::string& start_after, const std::string& filter_prefix, uint64_t max_return, std::map *out_vals); int omap_get_vals2(const std::string& oid, const std::string& start_after, const std::string& filter_prefix, uint64_t max_return, std::map *out_vals, bool *pmore); int omap_get_keys(const std::string& oid, const std::string& start_after, uint64_t max_return, std::set *out_keys); int omap_get_keys2(const std::string& oid, const std::string& start_after, uint64_t max_return, std::set *out_keys, bool *pmore); int omap_get_header(const std::string& oid, bufferlist *bl); int omap_get_vals_by_keys(const std::string& oid, const std::set& keys, std::map *vals); int omap_set(const std::string& oid, const std::map& map); int omap_set_header(const std::string& oid, const bufferlist& bl); int omap_clear(const std::string& oid); int omap_rm_keys(const std::string& oid, const std::set& keys); void snap_set_read(snap_t seq); int selfmanaged_snap_set_write_ctx(snap_t seq, std::vector& snaps); // Create a snapshot with a given name int snap_create(const char *snapname); // Look up a snapshot by name. // Returns 0 on success; error code otherwise int snap_lookup(const char *snapname, snap_t *snap); // Gets a timestamp for a snap int snap_get_stamp(snap_t snapid, time_t *t); // Gets the name of a snap int snap_get_name(snap_t snapid, std::string *s); // Remove a snapshot from this pool int snap_remove(const char *snapname); int snap_list(std::vector *snaps); int snap_rollback(const std::string& oid, const char *snapname); // Deprecated name kept for backward compatibility - same as snap_rollback() int rollback(const std::string& oid, const char *snapname) __attribute__ ((deprecated)); int selfmanaged_snap_create(uint64_t *snapid); void aio_selfmanaged_snap_create(uint64_t *snapid, AioCompletion *c); int selfmanaged_snap_remove(uint64_t snapid); void aio_selfmanaged_snap_remove(uint64_t snapid, AioCompletion *c); int selfmanaged_snap_rollback(const std::string& oid, uint64_t snapid); // Advisory locking on rados objects. int lock_exclusive(const std::string &oid, const std::string &name, const std::string &cookie, const std::string &description, struct timeval * duration, uint8_t flags); int lock_shared(const std::string &oid, const std::string &name, const std::string &cookie, const std::string &tag, const std::string &description, struct timeval * duration, uint8_t flags); int unlock(const std::string &oid, const std::string &name, const std::string &cookie); int break_lock(const std::string &oid, const std::string &name, const std::string &client, const std::string &cookie); int list_lockers(const std::string &oid, const std::string &name, int *exclusive, std::string *tag, std::list *lockers); /// Start enumerating objects for a pool NObjectIterator nobjects_begin(); NObjectIterator nobjects_begin(const bufferlist &filter); /// Start enumerating objects for a pool starting from a hash position NObjectIterator nobjects_begin(uint32_t start_hash_position); NObjectIterator nobjects_begin(uint32_t start_hash_position, const bufferlist &filter); /// Start enumerating objects for a pool starting from cursor NObjectIterator nobjects_begin(const librados::ObjectCursor& cursor); NObjectIterator nobjects_begin(const librados::ObjectCursor& cursor, const bufferlist &filter); /// Iterator indicating the end of a pool const NObjectIterator& nobjects_end() const; /// Get cursor for pool beginning ObjectCursor object_list_begin(); /// Get cursor for pool end ObjectCursor object_list_end(); /// Check whether a cursor is at the end of a pool bool object_list_is_end(const ObjectCursor &oc); /// List some objects between two cursors int object_list(const ObjectCursor &start, const ObjectCursor &finish, const size_t result_count, const bufferlist &filter, std::vector *result, ObjectCursor *next); /// Generate cursors that include the N out of Mth slice of the pool void object_list_slice( const ObjectCursor start, const ObjectCursor finish, const size_t n, const size_t m, ObjectCursor *split_start, ObjectCursor *split_finish); /** * List available hit set objects * * @param uint32_t [in] hash position to query * @param c [in] completion * @param pls [out] list of available intervals */ int hit_set_list(uint32_t hash, AioCompletion *c, std::list< std::pair > *pls); /** * Retrieve hit set for a given hash, and time * * @param hash [in] hash position * @param c [in] completion * @param stamp [in] time interval that falls within the hit set's interval * @param pbl [out] buffer to store the result in */ int hit_set_get(uint32_t hash, AioCompletion *c, time_t stamp, bufferlist *pbl); uint64_t get_last_version(); int aio_read(const std::string& oid, AioCompletion *c, bufferlist *pbl, size_t len, uint64_t off); /** * Asynchronously read from an object at a particular snapshot * * This is the same as normal aio_read, except that it chooses * the snapshot to read from from its arguments instead of the * internal IoCtx state. * * The return value of the completion will be number of bytes read on * success, negative error code on failure. * * @param oid the name of the object to read from * @param c what to do when the read is complete * @param pbl where to store the results * @param len the number of bytes to read * @param off the offset to start reading from in the object * @param snapid the id of the snapshot to read from * @returns 0 on success, negative error code on failure */ int aio_read(const std::string& oid, AioCompletion *c, bufferlist *pbl, size_t len, uint64_t off, uint64_t snapid); int aio_sparse_read(const std::string& oid, AioCompletion *c, std::map *m, bufferlist *data_bl, size_t len, uint64_t off); /** * Asynchronously read existing extents from an object at a * particular snapshot * * This is the same as normal aio_sparse_read, except that it chooses * the snapshot to read from from its arguments instead of the * internal IoCtx state. * * m will be filled in with a map of extents in the object, * mapping offsets to lengths (in bytes) within the range * requested. The data for all of the extents are stored * back-to-back in offset order in data_bl. * * @param oid the name of the object to read from * @param c what to do when the read is complete * @param m where to store the map of extents * @param data_bl where to store the data * @param len the number of bytes to read * @param off the offset to start reading from in the object * @param snapid the id of the snapshot to read from * @returns 0 on success, negative error code on failure */ int aio_sparse_read(const std::string& oid, AioCompletion *c, std::map *m, bufferlist *data_bl, size_t len, uint64_t off, uint64_t snapid); /** * Asynchronously compare an on-disk object range with a buffer * * @param oid the name of the object to read from * @param c what to do when the read is complete * @param off object byte offset at which to start the comparison * @param cmp_bl buffer containing bytes to be compared with object contents * @returns 0 on success, negative error code on failure, * (-MAX_ERRNO - mismatch_off) on mismatch */ int aio_cmpext(const std::string& oid, librados::AioCompletion *c, uint64_t off, bufferlist& cmp_bl); int aio_write(const std::string& oid, AioCompletion *c, const bufferlist& bl, size_t len, uint64_t off); int aio_append(const std::string& oid, AioCompletion *c, const bufferlist& bl, size_t len); int aio_write_full(const std::string& oid, AioCompletion *c, const bufferlist& bl); int aio_writesame(const std::string& oid, AioCompletion *c, const bufferlist& bl, size_t write_len, uint64_t off); /** * Asychronously remove an object * * Queues the remove and returns. * * The return value of the completion will be 0 on success, negative * error code on failure. * * @param oid the name of the object * @param c what to do when the remove is safe and complete * @returns 0 on success, -EROFS if the io context specifies a snap_seq * other than SNAP_HEAD */ int aio_remove(const std::string& oid, AioCompletion *c); int aio_remove(const std::string& oid, AioCompletion *c, int flags); /** * Wait for all currently pending aio writes to be safe. * * @returns 0 on success, negative error code on failure */ int aio_flush(); /** * Schedule a callback for when all currently pending * aio writes are safe. This is a non-blocking version of * aio_flush(). * * @param c what to do when the writes are safe * @returns 0 on success, negative error code on failure */ int aio_flush_async(AioCompletion *c); int aio_getxattr(const std::string& oid, AioCompletion *c, const char *name, bufferlist& bl); int aio_getxattrs(const std::string& oid, AioCompletion *c, std::map& attrset); int aio_setxattr(const std::string& oid, AioCompletion *c, const char *name, bufferlist& bl); int aio_rmxattr(const std::string& oid, AioCompletion *c, const char *name); int aio_stat(const std::string& oid, AioCompletion *c, uint64_t *psize, time_t *pmtime); int aio_stat2(const std::string& oid, AioCompletion *c, uint64_t *psize, struct timespec *pts); /** * Cancel aio operation * * @param c completion handle * @returns 0 on success, negative error code on failure */ int aio_cancel(AioCompletion *c); int aio_exec(const std::string& oid, AioCompletion *c, const char *cls, const char *method, bufferlist& inbl, bufferlist *outbl); /* * asynchronous version of unlock */ int aio_unlock(const std::string &oid, const std::string &name, const std::string &cookie, AioCompletion *c); // compound object operations int operate(const std::string& oid, ObjectWriteOperation *op); int operate(const std::string& oid, ObjectReadOperation *op, bufferlist *pbl); int aio_operate(const std::string& oid, AioCompletion *c, ObjectWriteOperation *op); int aio_operate(const std::string& oid, AioCompletion *c, ObjectWriteOperation *op, int flags); /** * Schedule an async write operation with explicit snapshot parameters * * This is the same as the first aio_operate(), except that it * gets the snapshot context from its arguments instead of the * IoCtx internal state. * * @param oid the object to operate on * @param c what to do when the operation is complete and safe * @param op which operations to perform * @param seq latest selfmanaged snapshot sequence number for this object * @param snaps currently existing selfmanaged snapshot ids for this object * @returns 0 on success, negative error code on failure */ int aio_operate(const std::string& oid, AioCompletion *c, ObjectWriteOperation *op, snap_t seq, std::vector& snaps); int aio_operate(const std::string& oid, AioCompletion *c, ObjectWriteOperation *op, snap_t seq, std::vector& snaps, const blkin_trace_info *trace_info); int aio_operate(const std::string& oid, AioCompletion *c, ObjectReadOperation *op, bufferlist *pbl); int aio_operate(const std::string& oid, AioCompletion *c, ObjectReadOperation *op, snap_t snapid, int flags, bufferlist *pbl) __attribute__ ((deprecated)); int aio_operate(const std::string& oid, AioCompletion *c, ObjectReadOperation *op, int flags, bufferlist *pbl); int aio_operate(const std::string& oid, AioCompletion *c, ObjectReadOperation *op, int flags, bufferlist *pbl, const blkin_trace_info *trace_info); // watch/notify int watch2(const std::string& o, uint64_t *handle, librados::WatchCtx2 *ctx); int watch3(const std::string& o, uint64_t *handle, librados::WatchCtx2 *ctx, uint32_t timeout); int aio_watch(const std::string& o, AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx); int aio_watch2(const std::string& o, AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx, uint32_t timeout); int unwatch2(uint64_t handle); int aio_unwatch(uint64_t handle, AioCompletion *c); /** * Send a notify event ot watchers * * Upon completion the pbl bufferlist reply payload will be * encoded like so: * * le32 num_acks * { * le64 gid global id for the client (for client.1234 that's 1234) * le64 cookie cookie for the client * le32 buflen length of reply message buffer * u8 * buflen payload * } * num_acks * le32 num_timeouts * { * le64 gid global id for the client * le64 cookie cookie for the client * } * num_timeouts * * */ int notify2(const std::string& o, ///< object bufferlist& bl, ///< optional broadcast payload uint64_t timeout_ms, ///< timeout (in ms) bufferlist *pbl); ///< reply buffer int aio_notify(const std::string& o, ///< object AioCompletion *c, ///< completion when notify completes bufferlist& bl, ///< optional broadcast payload uint64_t timeout_ms, ///< timeout (in ms) bufferlist *pbl); ///< reply buffer int list_watchers(const std::string& o, std::list *out_watchers); int list_snaps(const std::string& o, snap_set_t *out_snaps); void set_notify_timeout(uint32_t timeout); /// acknowledge a notify we received. void notify_ack(const std::string& o, ///< watched object uint64_t notify_id, ///< notify id uint64_t cookie, ///< our watch handle bufferlist& bl); ///< optional reply payload /*** * check on watch validity * * Check if a watch is valid. If so, return the number of * milliseconds since we last confirmed its liveness. If there is * a known error, return it. * * If there is an error, the watch is no longer valid, and should * be destroyed with unwatch(). The the user is still interested * in the object, a new watch should be created with watch(). * * @param cookie watch handle * @returns ms since last confirmed valid, or error */ int watch_check(uint64_t cookie); // old, deprecated versions int watch(const std::string& o, uint64_t ver, uint64_t *cookie, librados::WatchCtx *ctx) __attribute__ ((deprecated)); int notify(const std::string& o, uint64_t ver, bufferlist& bl) __attribute__ ((deprecated)); int unwatch(const std::string& o, uint64_t cookie) __attribute__ ((deprecated)); /** * Set allocation hint for an object * * This is an advisory operation, it will always succeed (as if it * was submitted with a OP_FAILOK flag set) and is not guaranteed * to do anything on the backend. * * @param o the name of the object * @param expected_object_size expected size of the object, in bytes * @param expected_write_size expected size of writes to the object, in bytes * @returns 0 on success, negative error code on failure */ int set_alloc_hint(const std::string& o, uint64_t expected_object_size, uint64_t expected_write_size); int set_alloc_hint2(const std::string& o, uint64_t expected_object_size, uint64_t expected_write_size, uint32_t flags); // assert version for next sync operations void set_assert_version(uint64_t ver); /** * Pin/unpin an object in cache tier * * @param o the name of the object * @returns 0 on success, negative error code on failure */ int cache_pin(const std::string& o); int cache_unpin(const std::string& o); std::string get_pool_name() const; void locator_set_key(const std::string& key); void set_namespace(const std::string& nspace); int64_t get_id(); // deprecated versions uint32_t get_object_hash_position(const std::string& oid) __attribute__ ((deprecated)); uint32_t get_object_pg_hash_position(const std::string& oid) __attribute__ ((deprecated)); int get_object_hash_position2(const std::string& oid, uint32_t *hash_position); int get_object_pg_hash_position2(const std::string& oid, uint32_t *pg_hash_position); config_t cct(); void set_osdmap_full_try(); void unset_osdmap_full_try(); int application_enable(const std::string& app_name, bool force); int application_enable_async(const std::string& app_name, bool force, PoolAsyncCompletion *c); int application_list(std::set *app_names); int application_metadata_get(const std::string& app_name, const std::string &key, std::string *value); int application_metadata_set(const std::string& app_name, const std::string &key, const std::string& value); int application_metadata_remove(const std::string& app_name, const std::string &key); int application_metadata_list(const std::string& app_name, std::map *values); private: /* You can only get IoCtx instances from Rados */ IoCtx(IoCtxImpl *io_ctx_impl_); friend class Rados; // Only Rados can use our private constructor to create IoCtxes. friend class libradosstriper::RadosStriper; // Striper needs to see our IoCtxImpl friend class ObjectWriteOperation; // copy_from needs to see our IoCtxImpl IoCtxImpl *io_ctx_impl; }; struct PlacementGroupImpl; struct CEPH_RADOS_API PlacementGroup { PlacementGroup(); PlacementGroup(const PlacementGroup&); ~PlacementGroup(); bool parse(const char*); std::unique_ptr impl; }; CEPH_RADOS_API std::ostream& operator<<(std::ostream&, const PlacementGroup&); class CEPH_RADOS_API Rados { public: static void version(int *major, int *minor, int *extra); Rados(); explicit Rados(IoCtx& ioctx); ~Rados(); int init(const char * const id); int init2(const char * const name, const char * const clustername, uint64_t flags); int init_with_context(config_t cct_); config_t cct(); int connect(); void shutdown(); int watch_flush(); int aio_watch_flush(AioCompletion*); int conf_read_file(const char * const path) const; int conf_parse_argv(int argc, const char ** argv) const; int conf_parse_argv_remainder(int argc, const char ** argv, const char ** remargv) const; int conf_parse_env(const char *env) const; int conf_set(const char *option, const char *value); int conf_get(const char *option, std::string &val); int service_daemon_register( const std::string& service, ///< service name (e.g., 'rgw') const std::string& name, ///< daemon name (e.g., 'gwfoo') const std::map& metadata); ///< static metadata about daemon int service_daemon_update_status( const std::map& status); int pool_create(const char *name); int pool_create(const char *name, uint64_t auid); int pool_create(const char *name, uint64_t auid, uint8_t crush_rule); int pool_create_async(const char *name, PoolAsyncCompletion *c); int pool_create_async(const char *name, uint64_t auid, PoolAsyncCompletion *c); int pool_create_async(const char *name, uint64_t auid, uint8_t crush_rule, PoolAsyncCompletion *c); int pool_get_base_tier(int64_t pool, int64_t* base_tier); int pool_delete(const char *name); int pool_delete_async(const char *name, PoolAsyncCompletion *c); int64_t pool_lookup(const char *name); int pool_reverse_lookup(int64_t id, std::string *name); uint64_t get_instance_id(); int mon_command(std::string cmd, const bufferlist& inbl, bufferlist *outbl, std::string *outs); int mgr_command(std::string cmd, const bufferlist& inbl, bufferlist *outbl, std::string *outs); int osd_command(int osdid, std::string cmd, const bufferlist& inbl, bufferlist *outbl, std::string *outs); int pg_command(const char *pgstr, std::string cmd, const bufferlist& inbl, bufferlist *outbl, std::string *outs); int ioctx_create(const char *name, IoCtx &pioctx); int ioctx_create2(int64_t pool_id, IoCtx &pioctx); // Features useful for test cases void test_blacklist_self(bool set); /* pool info */ int pool_list(std::list& v); int pool_list2(std::list >& v); int get_pool_stats(std::list& v, stats_map& result); /// deprecated; use simpler form. categories no longer supported. int get_pool_stats(std::list& v, std::map& stats); /// deprecated; categories no longer supported int get_pool_stats(std::list& v, std::string& category, std::map& stats); /// check if pool has selfmanaged snaps bool get_pool_is_selfmanaged_snaps_mode(const std::string& poolname); int cluster_stat(cluster_stat_t& result); int cluster_fsid(std::string *fsid); /** * List inconsistent placement groups in the given pool * * @param pool_id the pool id * @param pgs [out] the inconsistent PGs */ int get_inconsistent_pgs(int64_t pool_id, std::vector* pgs); /** * List the inconsistent objects found in a given PG by last scrub * * @param pg the placement group returned by @c pg_list() * @param start_after the first returned @c objects * @param max_return the max number of the returned @c objects * @param c what to do when the operation is complete and safe * @param objects [out] the objects where inconsistencies are found * @param interval [in,out] an epoch indicating current interval * @returns if a non-zero @c interval is specified, will return -EAGAIN i * the current interval begin epoch is different. */ int get_inconsistent_objects(const PlacementGroup& pg, const object_id_t &start_after, unsigned max_return, AioCompletion *c, std::vector* objects, uint32_t* interval); /** * List the inconsistent snapsets found in a given PG by last scrub * * @param pg the placement group returned by @c pg_list() * @param start_after the first returned @c objects * @param max_return the max number of the returned @c objects * @param c what to do when the operation is complete and safe * @param snapsets [out] the objects where inconsistencies are found * @param interval [in,out] an epoch indicating current interval * @returns if a non-zero @c interval is specified, will return -EAGAIN i * the current interval begin epoch is different. */ int get_inconsistent_snapsets(const PlacementGroup& pg, const object_id_t &start_after, unsigned max_return, AioCompletion *c, std::vector* snapset, uint32_t* interval); /// get/wait for the most recent osdmap int wait_for_latest_osdmap(); int blacklist_add(const std::string& client_address, uint32_t expire_seconds); /* * pool aio * * It is up to the caller to release the completion handler, even if the pool_create_async() * and/or pool_delete_async() fails and does not send the async request */ static PoolAsyncCompletion *pool_async_create_completion(); // -- aio -- static AioCompletion *aio_create_completion(); static AioCompletion *aio_create_completion(void *cb_arg, callback_t cb_complete, callback_t cb_safe); friend std::ostream& operator<<(std::ostream &oss, const Rados& r); private: // We don't allow assignment or copying Rados(const Rados& rhs); const Rados& operator=(const Rados& rhs); RadosClient *client; }; } #endif