X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Finclude%2Frados%2Flibrados.h;fp=src%2Fceph%2Fsrc%2Finclude%2Frados%2Flibrados.h;h=876bf8516ee6fdc51683a47cdeb4a34394d965fc;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/include/rados/librados.h b/src/ceph/src/include/rados/librados.h new file mode 100644 index 0000000..876bf85 --- /dev/null +++ b/src/ceph/src/include/rados/librados.h @@ -0,0 +1,3853 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOS_H +#define CEPH_LIBRADOS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif +#include +#include +#include "rados_types.h" + +#include + +#ifndef CEPH_OSD_TMAP_SET +/* These are also defined in rados.h and objclass.h. Keep them in sync! */ +#define CEPH_OSD_TMAP_HDR 'h' +#define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_CREATE 'c' +#define CEPH_OSD_TMAP_RM 'r' +#endif + +#define LIBRADOS_VER_MAJOR 0 +#define LIBRADOS_VER_MINOR 69 +#define LIBRADOS_VER_EXTRA 1 + +#define LIBRADOS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra) + +#define LIBRADOS_VERSION_CODE LIBRADOS_VERSION(LIBRADOS_VER_MAJOR, LIBRADOS_VER_MINOR, LIBRADOS_VER_EXTRA) + +#define LIBRADOS_SUPPORTS_WATCH 1 +#define LIBRADOS_SUPPORTS_SERVICES 1 + +/* RADOS lock flags + * They are also defined in cls_lock_types.h. Keep them in sync! + */ +#define LIBRADOS_LOCK_FLAG_RENEW 0x1 + +/* + * Constants for rados_write_op_create(). + */ +#define LIBRADOS_CREATE_EXCLUSIVE 1 +#define LIBRADOS_CREATE_IDEMPOTENT 0 + +/* + * Flags that can be set on a per-op basis via + * rados_read_op_set_flags() and rados_write_op_set_flags(). + */ +enum { + // fail a create operation if the object already exists + LIBRADOS_OP_FLAG_EXCL = 0x1, + // allow the transaction to succeed even if the flagged op fails + LIBRADOS_OP_FLAG_FAILOK = 0x2, + // indicate read/write op random + LIBRADOS_OP_FLAG_FADVISE_RANDOM = 0x4, + // indicate read/write op sequential + LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, + // indicate read/write data will be accessed in the near future (by someone) + LIBRADOS_OP_FLAG_FADVISE_WILLNEED = 0x10, + // indicate read/write data will not accessed in the near future (by anyone) + LIBRADOS_OP_FLAG_FADVISE_DONTNEED = 0x20, + // indicate read/write data will not accessed again (by *this* client) + LIBRADOS_OP_FLAG_FADVISE_NOCACHE = 0x40, +}; + +#if __GNUC__ >= 4 + #define CEPH_RADOS_API __attribute__ ((visibility ("default"))) +#else + #define CEPH_RADOS_API +#endif + +/** + * @name xattr comparison operations + * Operators for comparing xattrs on objects, and aborting the + * rados_read_op or rados_write_op transaction if the comparison + * fails. + * + * @{ + */ +enum { + LIBRADOS_CMPXATTR_OP_EQ = 1, + LIBRADOS_CMPXATTR_OP_NE = 2, + LIBRADOS_CMPXATTR_OP_GT = 3, + LIBRADOS_CMPXATTR_OP_GTE = 4, + LIBRADOS_CMPXATTR_OP_LT = 5, + LIBRADOS_CMPXATTR_OP_LTE = 6 +}; +/** @} */ + +/** + * @name Operation Flags + * Flags for rados_read_op_opeprate(), rados_write_op_operate(), + * rados_aio_read_op_operate(), and rados_aio_write_op_operate(). + * See librados.hpp for details. + * @{ + */ +enum { + LIBRADOS_OPERATION_NOFLAG = 0, + LIBRADOS_OPERATION_BALANCE_READS = 1, + LIBRADOS_OPERATION_LOCALIZE_READS = 2, + LIBRADOS_OPERATION_ORDER_READS_WRITES = 4, + LIBRADOS_OPERATION_IGNORE_CACHE = 8, + LIBRADOS_OPERATION_SKIPRWLOCKS = 16, + LIBRADOS_OPERATION_IGNORE_OVERLAY = 32, + /* send requests to cluster despite the cluster or pool being marked + full; ops will either succeed (e.g., delete) or return EDQUOT or + ENOSPC. */ + LIBRADOS_OPERATION_FULL_TRY = 64, + /* + * Mainly for delete op + */ + LIBRADOS_OPERATION_FULL_FORCE = 128, + LIBRADOS_OPERATION_IGNORE_REDIRECT = 256, +}; +/** @} */ + +/** + * @name Alloc hint flags + * Flags for rados_write_op_alloc_hint2() and rados_set_alloc_hint2() + * indicating future IO patterns. + * @{ + */ +enum { + LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_WRITE = 1, + LIBRADOS_ALLOC_HINT_FLAG_RANDOM_WRITE = 2, + LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4, + LIBRADOS_ALLOC_HINT_FLAG_RANDOM_READ = 8, + LIBRADOS_ALLOC_HINT_FLAG_APPEND_ONLY = 16, + LIBRADOS_ALLOC_HINT_FLAG_IMMUTABLE = 32, + LIBRADOS_ALLOC_HINT_FLAG_SHORTLIVED = 64, + LIBRADOS_ALLOC_HINT_FLAG_LONGLIVED = 128, + LIBRADOS_ALLOC_HINT_FLAG_COMPRESSIBLE = 256, + LIBRADOS_ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512, +}; +/** @} */ + +typedef enum { + LIBRADOS_CHECKSUM_TYPE_XXHASH32 = 0, + LIBRADOS_CHECKSUM_TYPE_XXHASH64 = 1, + LIBRADOS_CHECKSUM_TYPE_CRC32C = 2 +} rados_checksum_type_t; + +/* + * snap id contants + */ +#define LIBRADOS_SNAP_HEAD ((uint64_t)(-2)) +#define LIBRADOS_SNAP_DIR ((uint64_t)(-1)) + +/** + * @typedef rados_t + * + * A handle for interacting with a RADOS cluster. It encapsulates all + * RADOS client configuration, including username, key for + * authentication, logging, and debugging. Talking different clusters + * -- or to the same cluster with different users -- requires + * different cluster handles. + */ +#ifndef VOIDPTR_RADOS_T +#define VOIDPTR_RADOS_T +typedef void *rados_t; +#endif //VOIDPTR_RADOS_T + +/** + * @typedef rados_config_t + * + * A handle for the ceph configuration context for the rados_t cluster + * instance. This can be used to share configuration context/state + * (e.g., logging configuration) between librados instance. + * + * @warning The config context does not have independent reference + * counting. As such, a rados_config_t handle retrieved from a given + * rados_t is only valid as long as that rados_t. + */ +typedef void *rados_config_t; + +/** + * @typedef rados_ioctx_t + * + * An io context encapsulates a few settings for all I/O operations + * done on it: + * - pool - set when the io context is created (see rados_ioctx_create()) + * - snapshot context for writes (see + * rados_ioctx_selfmanaged_snap_set_write_ctx()) + * - snapshot id to read from (see rados_ioctx_snap_set_read()) + * - object locator for all single-object operations (see + * rados_ioctx_locator_set_key()) + * - namespace for all single-object operations (see + * rados_ioctx_set_namespace()). Set to LIBRADOS_ALL_NSPACES + * before rados_nobjects_list_open() will list all objects in all + * namespaces. + * + * @warning Changing any of these settings is not thread-safe - + * librados users must synchronize any of these changes on their own, + * or use separate io contexts for each thread + */ +typedef void *rados_ioctx_t; + +/** + * @typedef rados_list_ctx_t + * + * An iterator for listing the objects in a pool. + * Used with rados_nobjects_list_open(), + * rados_nobjects_list_next(), and + * rados_nobjects_list_close(). + */ +typedef void *rados_list_ctx_t; + +/** + * @typedef rados_object_list_cursor + * + * The cursor used with rados_enumerate_objects + * and accompanying methods. + */ +typedef void * rados_object_list_cursor; + +typedef struct rados_object_list_item { + size_t oid_length; + char *oid; + + size_t nspace_length; + char *nspace; + + size_t locator_length; + char *locator; +} rados_object_list_item; + +/** + * @typedef rados_snap_t + * The id of a snapshot. + */ +typedef uint64_t rados_snap_t; + +/** + * @typedef rados_xattrs_iter_t + * An iterator for listing extended attrbutes on an object. + * Used with rados_getxattrs(), rados_getxattrs_next(), and + * rados_getxattrs_end(). + */ +typedef void *rados_xattrs_iter_t; + +/** + * @typedef rados_omap_iter_t + * An iterator for listing omap key/value pairs on an object. + * Used with rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals(), + * rados_read_op_omap_get_vals_by_keys(), rados_omap_get_next(), and + * rados_omap_get_end(). + */ +typedef void *rados_omap_iter_t; + +/** + * @struct rados_pool_stat_t + * Usage information for a pool. + */ +struct rados_pool_stat_t { + /// space used in bytes + uint64_t num_bytes; + /// space used in KB + uint64_t num_kb; + /// number of objects in the pool + uint64_t num_objects; + /// number of clones of objects + uint64_t num_object_clones; + /// num_objects * num_replicas + uint64_t num_object_copies; + uint64_t num_objects_missing_on_primary; + /// number of objects found on no OSDs + uint64_t num_objects_unfound; + /// number of objects replicated fewer times than they should be + /// (but found on at least one OSD) + uint64_t num_objects_degraded; + uint64_t num_rd; + uint64_t num_rd_kb; + uint64_t num_wr; + uint64_t num_wr_kb; +}; + +/** + * @struct rados_cluster_stat_t + * Cluster-wide usage information + */ +struct rados_cluster_stat_t { + uint64_t kb, kb_used, kb_avail; + uint64_t num_objects; +}; + +/** + * @typedef rados_write_op_t + * + * An object write operation stores a number of operations which can be + * executed atomically. For usage, see: + * - Creation and deletion: rados_create_write_op() rados_release_write_op() + * - Extended attribute manipulation: rados_write_op_cmpxattr() + * rados_write_op_cmpxattr(), rados_write_op_setxattr(), + * rados_write_op_rmxattr() + * - Object map key/value pairs: rados_write_op_omap_set(), + * rados_write_op_omap_rm_keys(), rados_write_op_omap_clear(), + * rados_write_op_omap_cmp() + * - Object properties: rados_write_op_assert_exists(), + * rados_write_op_assert_version() + * - Creating objects: rados_write_op_create() + * - IO on objects: rados_write_op_append(), rados_write_op_write(), rados_write_op_zero + * rados_write_op_write_full(), rados_write_op_writesame(), rados_write_op_remove, + * rados_write_op_truncate(), rados_write_op_zero(), rados_write_op_cmpext() + * - Hints: rados_write_op_set_alloc_hint() + * - Performing the operation: rados_write_op_operate(), rados_aio_write_op_operate() + */ +typedef void *rados_write_op_t; + +/** + * @typedef rados_read_op_t + * + * An object read operation stores a number of operations which can be + * executed atomically. For usage, see: + * - Creation and deletion: rados_create_read_op() rados_release_read_op() + * - Extended attribute manipulation: rados_read_op_cmpxattr(), + * rados_read_op_getxattr(), rados_read_op_getxattrs() + * - Object map key/value pairs: rados_read_op_omap_get_vals(), + * rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals_by_keys(), + * rados_read_op_omap_cmp() + * - Object properties: rados_read_op_stat(), rados_read_op_assert_exists(), + * rados_read_op_assert_version() + * - IO on objects: rados_read_op_read(), rados_read_op_checksum(), + * rados_read_op_cmpext() + * - Custom operations: rados_read_op_exec(), rados_read_op_exec_user_buf() + * - Request properties: rados_read_op_set_flags() + * - Performing the operation: rados_read_op_operate(), + * rados_aio_read_op_operate() + */ +typedef void *rados_read_op_t; + +/** + * @typedef rados_completion_t + * Represents the state of an asynchronous operation - it contains the + * return value once the operation completes, and can be used to block + * until the operation is complete or safe. + */ +typedef void *rados_completion_t; + +/** + * @struct blkin_trace_info + * blkin trace information for Zipkin tracing + */ +struct blkin_trace_info; + +/** + * Get the version of librados. + * + * The version number is major.minor.extra. Note that this is + * unrelated to the Ceph version number. + * + * TODO: define version semantics, i.e.: + * - incrementing major is for backwards-incompatible changes + * - incrementing minor is for backwards-compatible changes + * - incrementing extra is for bug fixes + * + * @param major where to store the major version number + * @param minor where to store the minor version number + * @param extra where to store the extra version number + */ +CEPH_RADOS_API void rados_version(int *major, int *minor, int *extra); + +/** + * @name Setup and Teardown + * These are the first and last functions to that should be called + * when using librados. + * + * @{ + */ + +/** + * Create a handle for communicating with a RADOS cluster. + * + * Ceph environment variables are read when this is called, so if + * $CEPH_ARGS specifies everything you need to connect, no further + * configuration is necessary. + * + * @param cluster where to store the handle + * @param id the user to connect as (i.e. admin, not client.admin) + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_create(rados_t *cluster, const char * const id); + +/** + * Extended version of rados_create. + * + * Like rados_create, but + * 1) don't assume 'client\.'+id; allow full specification of name + * 2) allow specification of cluster name + * 3) flags for future expansion + */ +CEPH_RADOS_API int rados_create2(rados_t *pcluster, + const char *const clustername, + const char * const name, uint64_t flags); + +/** + * Initialize a cluster handle from an existing configuration. + * + * Share configuration state with another rados_t instance. + * + * @param cluster where to store the handle + * @param cct the existing configuration to use + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_create_with_context(rados_t *cluster, + rados_config_t cct); + +/** + * Ping the monitor with ID mon_id, storing the resulting reply in + * buf (if specified) with a maximum size of len. + * + * The result buffer is allocated on the heap; the caller is + * expected to release that memory with rados_buffer_free(). The + * buffer and length pointers can be NULL, in which case they are + * not filled in. + * + * @param cluster cluster handle + * @param[in] mon_id ID of the monitor to ping + * @param[out] outstr double pointer with the resulting reply + * @param[out] outstrlen pointer with the size of the reply in outstr + */ +CEPH_RADOS_API int rados_ping_monitor(rados_t cluster, const char *mon_id, + char **outstr, size_t *outstrlen); + +/** + * Connect to the cluster. + * + * @note BUG: Before calling this, calling a function that communicates with the + * cluster will crash. + * + * @pre The cluster handle is configured with at least a monitor + * address. If cephx is enabled, a client name and secret must also be + * set. + * + * @post If this succeeds, any function in librados may be used + * + * @param cluster The cluster to connect to. + * @returns 0 on sucess, negative error code on failure + */ +CEPH_RADOS_API int rados_connect(rados_t cluster); + +/** + * Disconnects from the cluster. + * + * For clean up, this is only necessary after rados_connect() has + * succeeded. + * + * @warning This does not guarantee any asynchronous writes have + * completed. To do that, you must call rados_aio_flush() on all open + * io contexts. + * + * @warning We implicitly call rados_watch_flush() on shutdown. If + * there are watches being used, this should be done explicitly before + * destroying the relevant IoCtx. We do it here as a safety measure. + * + * @post the cluster handle cannot be used again + * + * @param cluster the cluster to shutdown + */ +CEPH_RADOS_API void rados_shutdown(rados_t cluster); + +/** @} init */ + +/** + * @name Configuration + * These functions read and update Ceph configuration for a cluster + * handle. Any configuration changes must be done before connecting to + * the cluster. + * + * Options that librados users might want to set include: + * - mon_host + * - auth_supported + * - key, keyfile, or keyring when using cephx + * - log_file, log_to_stderr, err_to_stderr, and log_to_syslog + * - debug_rados, debug_objecter, debug_monc, debug_auth, or debug_ms + * + * See docs.ceph.com for information about available configuration options` + * + * @{ + */ + +/** + * Configure the cluster handle using a Ceph config file + * + * If path is NULL, the default locations are searched, and the first + * found is used. The locations are: + * - $CEPH_CONF (environment variable) + * - /etc/ceph/ceph.conf + * - ~/.ceph/config + * - ceph.conf (in the current working directory) + * + * @pre rados_connect() has not been called on the cluster handle + * + * @param cluster cluster handle to configure + * @param path path to a Ceph configuration file + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_conf_read_file(rados_t cluster, const char *path); + +/** + * Configure the cluster handle with command line arguments + * + * argv can contain any common Ceph command line option, including any + * configuration parameter prefixed by '--' and replacing spaces with + * dashes or underscores. For example, the following options are equivalent: + * - --mon-host 10.0.0.1:6789 + * - --mon_host 10.0.0.1:6789 + * - -m 10.0.0.1:6789 + * + * @pre rados_connect() has not been called on the cluster handle + * + * @param cluster cluster handle to configure + * @param argc number of arguments in argv + * @param argv arguments to parse + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_conf_parse_argv(rados_t cluster, int argc, + const char **argv); + + +/** + * Configure the cluster handle with command line arguments, returning + * any remainders. Same rados_conf_parse_argv, except for extra + * remargv argument to hold returns unrecognized arguments. + * + * @pre rados_connect() has not been called on the cluster handle + * + * @param cluster cluster handle to configure + * @param argc number of arguments in argv + * @param argv arguments to parse + * @param remargv char* array for returned unrecognized arguments + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_conf_parse_argv_remainder(rados_t cluster, int argc, + const char **argv, + const char **remargv); +/** + * Configure the cluster handle based on an environment variable + * + * The contents of the environment variable are parsed as if they were + * Ceph command line options. If var is NULL, the CEPH_ARGS + * environment variable is used. + * + * @pre rados_connect() has not been called on the cluster handle + * + * @note BUG: this is not threadsafe - it uses a static buffer + * + * @param cluster cluster handle to configure + * @param var name of the environment variable to read + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_conf_parse_env(rados_t cluster, const char *var); + +/** + * Set a configuration option + * + * @pre rados_connect() has not been called on the cluster handle + * + * @param cluster cluster handle to configure + * @param option option to set + * @param value value of the option + * @returns 0 on success, negative error code on failure + * @returns -ENOENT when the option is not a Ceph configuration option + */ +CEPH_RADOS_API int rados_conf_set(rados_t cluster, const char *option, + const char *value); + +/** + * Get the value of a configuration option + * + * @param cluster configuration to read + * @param option which option to read + * @param buf where to write the configuration value + * @param len the size of buf in bytes + * @returns 0 on success, negative error code on failure + * @returns -ENAMETOOLONG if the buffer is too short to contain the + * requested value + */ +CEPH_RADOS_API int rados_conf_get(rados_t cluster, const char *option, + char *buf, size_t len); + +/** @} config */ + +/** + * Read usage info about the cluster + * + * This tells you total space, space used, space available, and number + * of objects. These are not updated immediately when data is written, + * they are eventually consistent. + * + * @param cluster cluster to query + * @param result where to store the results + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_cluster_stat(rados_t cluster, + struct rados_cluster_stat_t *result); + +/** + * Get the fsid of the cluster as a hexadecimal string. + * + * The fsid is a unique id of an entire Ceph cluster. + * + * @param cluster where to get the fsid + * @param buf where to write the fsid + * @param len the size of buf in bytes (should be 37) + * @returns 0 on success, negative error code on failure + * @returns -ERANGE if the buffer is too short to contain the + * fsid + */ +CEPH_RADOS_API int rados_cluster_fsid(rados_t cluster, char *buf, size_t len); + +/** + * Get/wait for the most recent osdmap + * + * @param cluster the cluster to shutdown + * @returns 0 on sucess, negative error code on failure + */ +CEPH_RADOS_API int rados_wait_for_latest_osdmap(rados_t cluster); + +/** + * @name Pools + * + * RADOS pools are separate namespaces for objects. Pools may have + * different crush rules associated with them, so they could have + * differing replication levels or placement strategies. RADOS + * permissions are also tied to pools - users can have different read, + * write, and execute permissions on a per-pool basis. + * + * @{ + */ + +/** + * List pools + * + * Gets a list of pool names as NULL-terminated strings. The pool + * names will be placed in the supplied buffer one after another. + * After the last pool name, there will be two 0 bytes in a row. + * + * If len is too short to fit all the pool name entries we need, we will fill + * as much as we can. + * + * Buf may be null to determine the buffer size needed to list all pools. + * + * @param cluster cluster handle + * @param buf output buffer + * @param len output buffer length + * @returns length of the buffer we would need to list all pools + */ +CEPH_RADOS_API int rados_pool_list(rados_t cluster, char *buf, size_t len); + +/** + * List inconsistent placement groups of the given pool + * + * Gets a list of inconsistent placement groups as NULL-terminated strings. + * The placement group names will be placed in the supplied buffer one after + * another. After the last name, there will be two 0 types in a row. + * + * If len is too short to fit all the placement group entries we need, we will + * fill as much as we can. + * + * @param cluster cluster handle + * @param pool pool ID + * @param buf output buffer + * @param len output buffer length + * @returns length of the buffer we would need to list all pools + */ +CEPH_RADOS_API int rados_inconsistent_pg_list(rados_t cluster, int64_t pool, + char *buf, size_t len); + +/** + * Get a configuration handle for a rados cluster handle + * + * This handle is valid only as long as the cluster handle is valid. + * + * @param cluster cluster handle + * @returns config handle for this cluster + */ +CEPH_RADOS_API rados_config_t rados_cct(rados_t cluster); + +/** + * Get a global id for current instance + * + * This id is a unique representation of current connection to the cluster + * + * @param cluster cluster handle + * @returns instance global id + */ +CEPH_RADOS_API uint64_t rados_get_instance_id(rados_t cluster); + +/** + * Create an io context + * + * The io context allows you to perform operations within a particular + * pool. For more details see rados_ioctx_t. + * + * @param cluster which cluster the pool is in + * @param pool_name name of the pool + * @param ioctx where to store the io context + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_create(rados_t cluster, const char *pool_name, + rados_ioctx_t *ioctx); +CEPH_RADOS_API int rados_ioctx_create2(rados_t cluster, int64_t pool_id, + rados_ioctx_t *ioctx); + +/** + * The opposite of rados_ioctx_create + * + * This just tells librados that you no longer need to use the io context. + * It may not be freed immediately if there are pending asynchronous + * requests on it, but you should not use an io context again after + * calling this function on it. + * + * @warning This does not guarantee any asynchronous + * writes have completed. You must call rados_aio_flush() + * on the io context before destroying it to do that. + * + * @warning If this ioctx is used by rados_watch, the caller needs to + * be sure that all registered watches are disconnected via + * rados_unwatch() and that rados_watch_flush() is called. This + * ensures that a racing watch callback does not make use of a + * destroyed ioctx. + * + * @param io the io context to dispose of + */ +CEPH_RADOS_API void rados_ioctx_destroy(rados_ioctx_t io); + +/** + * Get configuration handle for a pool handle + * + * @param io pool handle + * @returns rados_config_t for this cluster + */ +CEPH_RADOS_API rados_config_t rados_ioctx_cct(rados_ioctx_t io); + +/** + * Get the cluster handle used by this rados_ioctx_t + * Note that this is a weak reference, and should not + * be destroyed via rados_shutdown(). + * + * @param io the io context + * @returns the cluster handle for this io context + */ +CEPH_RADOS_API rados_t rados_ioctx_get_cluster(rados_ioctx_t io); + +/** + * Get pool usage statistics + * + * Fills in a rados_pool_stat_t after querying the cluster. + * + * @param io determines which pool to query + * @param stats where to store the results + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_pool_stat(rados_ioctx_t io, + struct rados_pool_stat_t *stats); + +/** + * Get the id of a pool + * + * @param cluster which cluster the pool is in + * @param pool_name which pool to look up + * @returns id of the pool + * @returns -ENOENT if the pool is not found + */ +CEPH_RADOS_API int64_t rados_pool_lookup(rados_t cluster, + const char *pool_name); + +/** + * Get the name of a pool + * + * @param cluster which cluster the pool is in + * @param id the id of the pool + * @param buf where to store the pool name + * @param maxlen size of buffer where name will be stored + * @returns length of string stored, or -ERANGE if buffer too small + */ +CEPH_RADOS_API int rados_pool_reverse_lookup(rados_t cluster, int64_t id, + char *buf, size_t maxlen); + +/** + * Create a pool with default settings + * + * The default owner is the admin user (auid 0). + * The default crush rule is rule 0. + * + * @param cluster the cluster in which the pool will be created + * @param pool_name the name of the new pool + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_pool_create(rados_t cluster, const char *pool_name); + +/** + * Create a pool owned by a specific auid + * + * The auid is the authenticated user id to give ownership of the pool. + * TODO: document auid and the rest of the auth system + * + * @param cluster the cluster in which the pool will be created + * @param pool_name the name of the new pool + * @param auid the id of the owner of the new pool + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_pool_create_with_auid(rados_t cluster, + const char *pool_name, + uint64_t auid); + +/** + * Create a pool with a specific CRUSH rule + * + * @param cluster the cluster in which the pool will be created + * @param pool_name the name of the new pool + * @param crush_rule_num which rule to use for placement in the new pool1 + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_pool_create_with_crush_rule(rados_t cluster, + const char *pool_name, + uint8_t crush_rule_num); + +/** + * Create a pool with a specific CRUSH rule and auid + * + * This is a combination of rados_pool_create_with_crush_rule() and + * rados_pool_create_with_auid(). + * + * @param cluster the cluster in which the pool will be created + * @param pool_name the name of the new pool + * @param crush_rule_num which rule to use for placement in the new pool2 + * @param auid the id of the owner of the new pool + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_pool_create_with_all(rados_t cluster, + const char *pool_name, + uint64_t auid, + uint8_t crush_rule_num); + +/** + * Returns the pool that is the base tier for this pool. + * + * The return value is the ID of the pool that should be used to read from/write to. + * If tiering is not set up for the pool, returns \c pool. + * + * @param cluster the cluster the pool is in + * @param pool ID of the pool to query + * @param[out] base_tier base tier, or \c pool if tiering is not configured + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_pool_get_base_tier(rados_t cluster, int64_t pool, + int64_t* base_tier); + +/** + * Delete a pool and all data inside it + * + * The pool is removed from the cluster immediately, + * but the actual data is deleted in the background. + * + * @param cluster the cluster the pool is in + * @param pool_name which pool to delete + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_pool_delete(rados_t cluster, const char *pool_name); + +/** + * Attempt to change an io context's associated auid "owner" + * + * Requires that you have write permission on both the current and new + * auid. + * + * @param io reference to the pool to change. + * @param auid the auid you wish the io to have. + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_pool_set_auid(rados_ioctx_t io, uint64_t auid); + +/** + * Get the auid of a pool + * + * @param io pool to query + * @param auid where to store the auid + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_pool_get_auid(rados_ioctx_t io, uint64_t *auid); + +/* deprecated, use rados_ioctx_pool_requires_alignment2 instead */ +CEPH_RADOS_API int rados_ioctx_pool_requires_alignment(rados_ioctx_t io) + __attribute__((deprecated)); + +/** + * Test whether the specified pool requires alignment or not. + * + * @param io pool to query + * @param requires 1 if alignment is supported, 0 if not. + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io, + int *requires); + +/* deprecated, use rados_ioctx_pool_required_alignment2 instead */ +CEPH_RADOS_API uint64_t rados_ioctx_pool_required_alignment(rados_ioctx_t io) + __attribute__((deprecated)); + +/** + * Get the alignment flavor of a pool + * + * @param io pool to query + * @param alignment where to store the alignment flavor + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_pool_required_alignment2(rados_ioctx_t io, + uint64_t *alignment); + +/** + * Get the pool id of the io context + * + * @param io the io context to query + * @returns the id of the pool the io context uses + */ +CEPH_RADOS_API int64_t rados_ioctx_get_id(rados_ioctx_t io); + +/** + * Get the pool name of the io context + * + * @param io the io context to query + * @param buf pointer to buffer where name will be stored + * @param maxlen size of buffer where name will be stored + * @returns length of string stored, or -ERANGE if buffer too small + */ +CEPH_RADOS_API int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf, + unsigned maxlen); + +/** @} pools */ + +/** + * @name Object Locators + * + * @{ + */ + +/** + * Set the key for mapping objects to pgs within an io context. + * + * The key is used instead of the object name to determine which + * placement groups an object is put in. This affects all subsequent + * operations of the io context - until a different locator key is + * set, all objects in this io context will be placed in the same pg. + * + * @param io the io context to change + * @param key the key to use as the object locator, or NULL to discard + * any previously set key + */ +CEPH_RADOS_API void rados_ioctx_locator_set_key(rados_ioctx_t io, + const char *key); + +/** + * Set the namespace for objects within an io context + * + * The namespace specification further refines a pool into different + * domains. The mapping of objects to pgs is also based on this + * value. + * + * @param io the io context to change + * @param nspace the name to use as the namespace, or NULL use the + * default namespace + */ +CEPH_RADOS_API void rados_ioctx_set_namespace(rados_ioctx_t io, + const char *nspace); +/** @} obj_loc */ + +/** + * @name Listing Objects + * @{ + */ +/** + * Start listing objects in a pool + * + * @param io the pool to list from + * @param ctx the handle to store list context in + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_nobjects_list_open(rados_ioctx_t io, + rados_list_ctx_t *ctx); + +/** + * Return hash position of iterator, rounded to the current PG + * + * @param ctx iterator marking where you are in the listing + * @returns current hash position, rounded to the current pg + */ +CEPH_RADOS_API uint32_t rados_nobjects_list_get_pg_hash_position(rados_list_ctx_t ctx); + +/** + * Reposition object iterator to a different hash position + * + * @param ctx iterator marking where you are in the listing + * @param pos hash position to move to + * @returns actual (rounded) position we moved to + */ +CEPH_RADOS_API uint32_t rados_nobjects_list_seek(rados_list_ctx_t ctx, + uint32_t pos); + +/** + * Reposition object iterator to a different position + * + * @param ctx iterator marking where you are in the listing + * @param cursor position to move to + * @returns rounded position we moved to + */ +CEPH_RADOS_API uint32_t rados_nobjects_list_seek_cursor(rados_list_ctx_t ctx, + rados_object_list_cursor cursor); + +/** + * Reposition object iterator to a different position + * + * The returned handle must be released with rados_object_list_cursor_free(). + * + * @param ctx iterator marking where you are in the listing + * @param cursor where to store cursor + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_nobjects_list_get_cursor(rados_list_ctx_t ctx, + rados_object_list_cursor *cursor); + +/** + * Get the next object name and locator in the pool + * + * *entry and *key are valid until next call to rados_nobjects_list_* + * + * @param ctx iterator marking where you are in the listing + * @param entry where to store the name of the entry + * @param key where to store the object locator (set to NULL to ignore) + * @param nspace where to store the object namespace (set to NULL to ignore) + * @returns 0 on success, negative error code on failure + * @returns -ENOENT when there are no more objects to list + */ +CEPH_RADOS_API int rados_nobjects_list_next(rados_list_ctx_t ctx, + const char **entry, + const char **key, + const char **nspace); + +/** + * Close the object listing handle. + * + * This should be called when the handle is no longer needed. + * The handle should not be used after it has been closed. + * + * @param ctx the handle to close + */ +CEPH_RADOS_API void rados_nobjects_list_close(rados_list_ctx_t ctx); + +/** + * Get cursor handle pointing to the *beginning* of a pool. + * + * This is an opaque handle pointing to the start of a pool. It must + * be released with rados_object_list_cursor_free(). + * + * @param io ioctx for the pool + * @returns handle for the pool, NULL on error (pool does not exist) + */ +CEPH_RADOS_API rados_object_list_cursor rados_object_list_begin( + rados_ioctx_t io); + +/** + * Get cursor handle pointing to the *end* of a pool. + * + * This is an opaque handle pointing to the start of a pool. It must + * be released with rados_object_list_cursor_free(). + * + * @param io ioctx for the pool + * @returns handle for the pool, NULL on error (pool does not exist) + */ +CEPH_RADOS_API rados_object_list_cursor rados_object_list_end(rados_ioctx_t io); + +/** + * Check if a cursor has reached the end of a pool + * + * @param io ioctx + * @param cur cursor + * @returns 1 if the cursor has reached the end of the pool, 0 otherwise + */ +CEPH_RADOS_API int rados_object_list_is_end(rados_ioctx_t io, + rados_object_list_cursor cur); + +/** + * Release a cursor + * + * Release a cursor. The handle may not be used after this point. + * + * @param io ioctx + * @param cur cursor + */ +CEPH_RADOS_API void rados_object_list_cursor_free(rados_ioctx_t io, + rados_object_list_cursor cur); + +/** + * Compare two cursor positions + * + * Compare two cursors, and indicate whether the first cursor precedes, + * matches, or follows the second. + * + * @param io ioctx + * @param lhs first cursor + * @param rhs second cursor + * @returns -1, 0, or 1 for lhs < rhs, lhs == rhs, or lhs > rhs + */ +CEPH_RADOS_API int rados_object_list_cursor_cmp(rados_ioctx_t io, + rados_object_list_cursor lhs, rados_object_list_cursor rhs); + +/** + * @return the number of items set in the result array + */ +CEPH_RADOS_API int rados_object_list(rados_ioctx_t io, + const rados_object_list_cursor start, + const rados_object_list_cursor finish, + const size_t result_size, + const char *filter_buf, + const size_t filter_buf_len, + rados_object_list_item *results, + rados_object_list_cursor *next); + +CEPH_RADOS_API void rados_object_list_free( + const size_t result_size, + rados_object_list_item *results); + +/** + * Obtain cursors delineating a subset of a range. Use this + * when you want to split up the work of iterating over the + * global namespace. Expected use case is when you are iterating + * in parallel, with `m` workers, and each worker taking an id `n`. + * + * @param start start of the range to be sliced up (inclusive) + * @param finish end of the range to be sliced up (exclusive) + * @param m how many chunks to divide start-finish into + * @param n which of the m chunks you would like to get cursors for + * @param split_start cursor populated with start of the subrange (inclusive) + * @param split_finish cursor populated with end of the subrange (exclusive) + */ +CEPH_RADOS_API void rados_object_list_slice(rados_ioctx_t io, + const rados_object_list_cursor start, + const rados_object_list_cursor finish, + const size_t n, + const size_t m, + rados_object_list_cursor *split_start, + rados_object_list_cursor *split_finish); + + +/** @} Listing Objects */ + +/** + * @name Snapshots + * + * RADOS snapshots are based upon sequence numbers that form a + * snapshot context. They are pool-specific. The snapshot context + * consists of the current snapshot sequence number for a pool, and an + * array of sequence numbers at which snapshots were taken, in + * descending order. Whenever a snapshot is created or deleted, the + * snapshot sequence number for the pool is increased. To add a new + * snapshot, the new snapshot sequence number must be increased and + * added to the snapshot context. + * + * There are two ways to manage these snapshot contexts: + * -# within the RADOS cluster + * These are called pool snapshots, and store the snapshot context + * in the OSDMap. These represent a snapshot of all the objects in + * a pool. + * -# within the RADOS clients + * These are called self-managed snapshots, and push the + * responsibility for keeping track of the snapshot context to the + * clients. For every write, the client must send the snapshot + * context. In librados, this is accomplished with + * rados_selfmanaged_snap_set_write_ctx(). These are more + * difficult to manage, but are restricted to specific objects + * instead of applying to an entire pool. + * + * @{ + */ + +/** + * Create a pool-wide snapshot + * + * @param io the pool to snapshot + * @param snapname the name of the snapshot + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_snap_create(rados_ioctx_t io, + const char *snapname); + +/** + * Delete a pool snapshot + * + * @param io the pool to delete the snapshot from + * @param snapname which snapshot to delete + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_snap_remove(rados_ioctx_t io, + const char *snapname); + +/** + * Rollback an object to a pool snapshot + * + * The contents of the object will be the same as + * when the snapshot was taken. + * + * @param io the pool in which the object is stored + * @param oid the name of the object to rollback + * @param snapname which snapshot to rollback to + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_snap_rollback(rados_ioctx_t io, const char *oid, + const char *snapname); + +/** + * @warning Deprecated: Use rados_ioctx_snap_rollback() instead + */ +CEPH_RADOS_API int rados_rollback(rados_ioctx_t io, const char *oid, + const char *snapname) + __attribute__((deprecated)); + +/** + * Set the snapshot from which reads are performed. + * + * Subsequent reads will return data as it was at the time of that + * snapshot. + * + * @param io the io context to change + * @param snap the id of the snapshot to set, or LIBRADOS_SNAP_HEAD for no + * snapshot (i.e. normal operation) + */ +CEPH_RADOS_API void rados_ioctx_snap_set_read(rados_ioctx_t io, + rados_snap_t snap); + +/** + * Allocate an ID for a self-managed snapshot + * + * Get a unique ID to put in the snaphot context to create a + * snapshot. A clone of an object is not created until a write with + * the new snapshot context is completed. + * + * @param io the pool in which the snapshot will exist + * @param snapid where to store the newly allocated snapshot ID + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io, + rados_snap_t *snapid); +CEPH_RADOS_API void +rados_aio_ioctx_selfmanaged_snap_create(rados_ioctx_t io, + rados_snap_t *snapid, + rados_completion_t completion); + +/** + * Remove a self-managed snapshot + * + * This increases the snapshot sequence number, which will cause + * snapshots to be removed lazily. + * + * @param io the pool in which the snapshot will exist + * @param snapid where to store the newly allocated snapshot ID + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io, + rados_snap_t snapid); +CEPH_RADOS_API void +rados_aio_ioctx_selfmanaged_snap_remove(rados_ioctx_t io, + rados_snap_t snapid, + rados_completion_t completion); + +/** + * Rollback an object to a self-managed snapshot + * + * The contents of the object will be the same as + * when the snapshot was taken. + * + * @param io the pool in which the object is stored + * @param oid the name of the object to rollback + * @param snapid which snapshot to rollback to + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io, + const char *oid, + rados_snap_t snapid); + +/** + * Set the snapshot context for use when writing to objects + * + * This is stored in the io context, and applies to all future writes. + * + * @param io the io context to change + * @param seq the newest snapshot sequence number for the pool + * @param snaps array of snapshots in sorted by descending id + * @param num_snaps how many snaphosts are in the snaps array + * @returns 0 on success, negative error code on failure + * @returns -EINVAL if snaps are not in descending order + */ +CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io, + rados_snap_t seq, + rados_snap_t *snaps, + int num_snaps); + +/** + * List all the ids of pool snapshots + * + * If the output array does not have enough space to fit all the + * snapshots, -ERANGE is returned and the caller should retry with a + * larger array. + * + * @param io the pool to read from + * @param snaps where to store the results + * @param maxlen the number of rados_snap_t that fit in the snaps array + * @returns number of snapshots on success, negative error code on failure + * @returns -ERANGE is returned if the snaps array is too short + */ +CEPH_RADOS_API int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t *snaps, + int maxlen); + +/** + * Get the id of a pool snapshot + * + * @param io the pool to read from + * @param name the snapshot to find + * @param id where to store the result + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_snap_lookup(rados_ioctx_t io, const char *name, + rados_snap_t *id); + +/** + * Get the name of a pool snapshot + * + * @param io the pool to read from + * @param id the snapshot to find + * @param name where to store the result + * @param maxlen the size of the name array + * @returns 0 on success, negative error code on failure + * @returns -ERANGE if the name array is too small + */ +CEPH_RADOS_API int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id, + char *name, int maxlen); + +/** + * Find when a pool snapshot occurred + * + * @param io the pool the snapshot was taken in + * @param id the snapshot to lookup + * @param t where to store the result + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id, + time_t *t); + +/** @} Snapshots */ + +/** + * @name Synchronous I/O + * Writes are replicated to a number of OSDs based on the + * configuration of the pool they are in. These write functions block + * until data is in memory on all replicas of the object they're + * writing to - they are equivalent to doing the corresponding + * asynchronous write, and the calling + * rados_ioctx_wait_for_complete(). For greater data safety, use the + * asynchronous functions and rados_aio_wait_for_safe(). + * + * @{ + */ + +/** + * Return the version of the last object read or written to. + * + * This exposes the internal version number of the last object read or + * written via this io context + * + * @param io the io context to check + * @returns last read or written object version + */ +CEPH_RADOS_API uint64_t rados_get_last_version(rados_ioctx_t io); + +/** + * Write *len* bytes from *buf* into the *oid* object, starting at + * offset *off*. The value of *len* must be <= UINT_MAX/2. + * + * @note This will never return a positive value not equal to len. + * @param io the io context in which the write will occur + * @param oid name of the object + * @param buf data to write + * @param len length of the data, in bytes + * @param off byte offset in the object to begin writing at + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_write(rados_ioctx_t io, const char *oid, + const char *buf, size_t len, uint64_t off); + +/** + * Write *len* bytes from *buf* into the *oid* object. The value of + * *len* must be <= UINT_MAX/2. + * + * The object is filled with the provided data. If the object exists, + * it is atomically truncated and then written. + * + * @param io the io context in which the write will occur + * @param oid name of the object + * @param buf data to write + * @param len length of the data, in bytes + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_write_full(rados_ioctx_t io, const char *oid, + const char *buf, size_t len); + +/** + * Write the same *data_len* bytes from *buf* multiple times into the + * *oid* object. *write_len* bytes are written in total, which must be + * a multiple of *data_len*. The value of *write_len* and *data_len* + * must be <= UINT_MAX/2. + * + * @param io the io context in which the write will occur + * @param oid name of the object + * @param buf data to write + * @param data_len length of the data, in bytes + * @param write_len the total number of bytes to write + * @param off byte offset in the object to begin writing at + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_writesame(rados_ioctx_t io, const char *oid, + const char *buf, size_t data_len, + size_t write_len, uint64_t off); + +/** + * Append *len* bytes from *buf* into the *oid* object. The value of + * *len* must be <= UINT_MAX/2. + * + * @param io the context to operate in + * @param oid the name of the object + * @param buf the data to append + * @param len length of buf (in bytes) + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_append(rados_ioctx_t io, const char *oid, + const char *buf, size_t len); + +/** + * Read data from an object + * + * The io context determines the snapshot to read from, if any was set + * by rados_ioctx_snap_set_read(). + * + * @param io the context in which to perform the read + * @param oid the name of the object to read from + * @param buf where to store the results + * @param len the number of bytes to read + * @param off the offset to start reading from in the object + * @returns number of bytes read on success, negative error code on + * failure + */ +CEPH_RADOS_API int rados_read(rados_ioctx_t io, const char *oid, char *buf, + size_t len, uint64_t off); + +/** + * Compute checksum from object data + * + * The io context determines the snapshot to checksum, if any was set + * by rados_ioctx_snap_set_read(). The length of the init_value and + * resulting checksum are dependent upon the checksum type: + * + * XXHASH64: le64 + * XXHASH32: le32 + * CRC32C: le32 + * + * The checksum result is encoded the following manner: + * + * le32 num_checksum_chunks + * { + * leXX checksum for chunk (where XX = appropriate size for the checksum type) + * } * num_checksum_chunks + * + * @param io the context in which to perform the checksum + * @param oid the name of the object to checksum + * @param type the checksum algorithm to utilize + * @param init_value the init value for the algorithm + * @param init_value_len the length of the init value + * @param len the number of bytes to checksum + * @param off the offset to start checksuming in the object + * @param chunk_size optional length-aligned chunk size for checksums + * @param pchecksum where to store the checksum result + * @param checksum_len the number of bytes available for the result + * @return negative error code on failure + */ +CEPH_RADOS_API int rados_checksum(rados_ioctx_t io, const char *oid, + rados_checksum_type_t type, + const char *init_value, size_t init_value_len, + size_t len, uint64_t off, size_t chunk_size, + char *pchecksum, size_t checksum_len); + +/** + * Delete an object + * + * @note This does not delete any snapshots of the object. + * + * @param io the pool to delete the object from + * @param oid the name of the object to delete + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_remove(rados_ioctx_t io, const char *oid); + +/** + * Resize an object + * + * If this enlarges the object, the new area is logically filled with + * zeroes. If this shrinks the object, the excess data is removed. + * + * @param io the context in which to truncate + * @param oid the name of the object + * @param size the new size of the object in bytes + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_trunc(rados_ioctx_t io, const char *oid, + uint64_t size); + +/** + * Compare an on-disk object range with a buffer + * + * @param io the context in which to perform the comparison + * @param o name of the object + * @param cmp_buf buffer containing bytes to be compared with object contents + * @param cmp_len length to compare and size of @cmp_buf in bytes + * @param off object byte offset at which to start the comparison + * @returns 0 on success, negative error code on failure, + * (-MAX_ERRNO - mismatch_off) on mismatch + */ +CEPH_RADOS_API int rados_cmpext(rados_ioctx_t io, const char *o, + const char *cmp_buf, size_t cmp_len, + uint64_t off); + +/** + * @name Xattrs + * Extended attributes are stored as extended attributes on the files + * representing an object on the OSDs. Thus, they have the same + * limitations as the underlying filesystem. On ext4, this means that + * the total data stored in xattrs cannot exceed 4KB. + * + * @{ + */ + +/** + * Get the value of an extended attribute on an object. + * + * @param io the context in which the attribute is read + * @param o name of the object + * @param name which extended attribute to read + * @param buf where to store the result + * @param len size of buf in bytes + * @returns length of xattr value on success, negative error code on failure + */ +CEPH_RADOS_API int rados_getxattr(rados_ioctx_t io, const char *o, + const char *name, char *buf, size_t len); + +/** + * Set an extended attribute on an object. + * + * @param io the context in which xattr is set + * @param o name of the object + * @param name which extended attribute to set + * @param buf what to store in the xattr + * @param len the number of bytes in buf + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_setxattr(rados_ioctx_t io, const char *o, + const char *name, const char *buf, + size_t len); + +/** + * Delete an extended attribute from an object. + * + * @param io the context in which to delete the xattr + * @param o the name of the object + * @param name which xattr to delete + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_rmxattr(rados_ioctx_t io, const char *o, + const char *name); + +/** + * Start iterating over xattrs on an object. + * + * @post iter is a valid iterator + * + * @param io the context in which to list xattrs + * @param oid name of the object + * @param iter where to store the iterator + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_getxattrs(rados_ioctx_t io, const char *oid, + rados_xattrs_iter_t *iter); + +/** + * Get the next xattr on the object + * + * @pre iter is a valid iterator + * + * @post name is the NULL-terminated name of the next xattr, and val + * contains the value of the xattr, which is of length len. If the end + * of the list has been reached, name and val are NULL, and len is 0. + * + * @param iter iterator to advance + * @param name where to store the name of the next xattr + * @param val where to store the value of the next xattr + * @param len the number of bytes in val + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_getxattrs_next(rados_xattrs_iter_t iter, + const char **name, const char **val, + size_t *len); + +/** + * Close the xattr iterator. + * + * iter should not be used after this is called. + * + * @param iter the iterator to close + */ +CEPH_RADOS_API void rados_getxattrs_end(rados_xattrs_iter_t iter); + +/** @} Xattrs */ + +/** + * Get the next omap key/value pair on the object + * + * @pre iter is a valid iterator + * + * @post key and val are the next key/value pair. key is + * null-terminated, and val has length len. If the end of the list has + * been reached, key and val are NULL, and len is 0. key and val will + * not be accessible after rados_omap_get_end() is called on iter, so + * if they are needed after that they should be copied. + * + * @param iter iterator to advance + * @param key where to store the key of the next omap entry + * @param val where to store the value of the next omap entry + * @param len where to store the number of bytes in val + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_omap_get_next(rados_omap_iter_t iter, + char **key, + char **val, + size_t *len); + +/** + * Close the omap iterator. + * + * iter should not be used after this is called. + * + * @param iter the iterator to close + */ +CEPH_RADOS_API void rados_omap_get_end(rados_omap_iter_t iter); + +/** + * Get object stats (size/mtime) + * + * TODO: when are these set, and by whom? can they be out of date? + * + * @param io ioctx + * @param o object name + * @param psize where to store object size + * @param pmtime where to store modification time + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize, + time_t *pmtime); + +/** + * Update tmap (trivial map) + * + * Do compound update to a tmap object, inserting or deleting some + * number of records. cmdbuf is a series of operation byte + * codes, following by command payload. Each command is a single-byte + * command code, whose value is one of CEPH_OSD_TMAP_*. + * + * - update tmap 'header' + * - 1 byte = CEPH_OSD_TMAP_HDR + * - 4 bytes = data length (little endian) + * - N bytes = data + * + * - insert/update one key/value pair + * - 1 byte = CEPH_OSD_TMAP_SET + * - 4 bytes = key name length (little endian) + * - N bytes = key name + * - 4 bytes = data length (little endian) + * - M bytes = data + * + * - insert one key/value pair; return -EEXIST if it already exists. + * - 1 byte = CEPH_OSD_TMAP_CREATE + * - 4 bytes = key name length (little endian) + * - N bytes = key name + * - 4 bytes = data length (little endian) + * - M bytes = data + * + * - remove one key/value pair + * - 1 byte = CEPH_OSD_TMAP_RM + * - 4 bytes = key name length (little endian) + * - N bytes = key name + * + * Restrictions: + * - The HDR update must preceed any key/value updates. + * - All key/value updates must be in lexicographically sorted order + * in cmdbuf. + * - You can read/write to a tmap object via the regular APIs, but + * you should be careful not to corrupt it. Also be aware that the + * object format may change without notice. + * + * @param io ioctx + * @param o object name + * @param cmdbuf command buffer + * @param cmdbuflen command buffer length in bytes + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_tmap_update(rados_ioctx_t io, const char *o, + const char *cmdbuf, size_t cmdbuflen); + +/** + * Store complete tmap (trivial map) object + * + * Put a full tmap object into the store, replacing what was there. + * + * The format of buf is: + * - 4 bytes - length of header (little endian) + * - N bytes - header data + * - 4 bytes - number of keys (little endian) + * + * and for each key, + * - 4 bytes - key name length (little endian) + * - N bytes - key name + * - 4 bytes - value length (little endian) + * - M bytes - value data + * + * @param io ioctx + * @param o object name + * @param buf buffer + * @param buflen buffer length in bytes + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_tmap_put(rados_ioctx_t io, const char *o, + const char *buf, size_t buflen); + +/** + * Fetch complete tmap (trivial map) object + * + * Read a full tmap object. See rados_tmap_put() for the format the + * data is returned in. + * + * @param io ioctx + * @param o object name + * @param buf buffer + * @param buflen buffer length in bytes + * @returns 0 on success, negative error code on failure + * @returns -ERANGE if buf isn't big enough + */ +CEPH_RADOS_API int rados_tmap_get(rados_ioctx_t io, const char *o, char *buf, + size_t buflen); + +/** + * Execute an OSD class method on an object + * + * The OSD has a plugin mechanism for performing complicated + * operations on an object atomically. These plugins are called + * classes. This function allows librados users to call the custom + * methods. The input and output formats are defined by the class. + * Classes in ceph.git can be found in src/cls subdirectories + * + * @param io the context in which to call the method + * @param oid the object to call the method on + * @param cls the name of the class + * @param method the name of the method + * @param in_buf where to find input + * @param in_len length of in_buf in bytes + * @param buf where to store output + * @param out_len length of buf in bytes + * @returns the length of the output, or + * -ERANGE if out_buf does not have enough space to store it (For methods that return data). For + * methods that don't return data, the return value is + * method-specific. + */ +CEPH_RADOS_API int rados_exec(rados_ioctx_t io, const char *oid, + const char *cls, const char *method, + const char *in_buf, size_t in_len, char *buf, + size_t out_len); + + +/** @} Synchronous I/O */ + +/** + * @name Asynchronous I/O + * Read and write to objects without blocking. + * + * @{ + */ + +/** + * @typedef rados_callback_t + * Callbacks for asynchrous operations take two parameters: + * - cb the completion that has finished + * - arg application defined data made available to the callback function + */ +typedef void (*rados_callback_t)(rados_completion_t cb, void *arg); + +/** + * Constructs a completion to use with asynchronous operations + * + * The complete and safe callbacks correspond to operations being + * acked and committed, respectively. The callbacks are called in + * order of receipt, so the safe callback may be triggered before the + * complete callback, and vice versa. This is affected by journalling + * on the OSDs. + * + * TODO: more complete documentation of this elsewhere (in the RADOS docs?) + * + * @note Read operations only get a complete callback. + * @note BUG: this should check for ENOMEM instead of throwing an exception + * + * @param cb_arg application-defined data passed to the callback functions + * @param cb_complete the function to be called when the operation is + * in memory on all relpicas + * @param cb_safe the function to be called when the operation is on + * stable storage on all replicas + * @param pc where to store the completion + * @returns 0 + */ +CEPH_RADOS_API int rados_aio_create_completion(void *cb_arg, + rados_callback_t cb_complete, + rados_callback_t cb_safe, + rados_completion_t *pc); + +/** + * Block until an operation completes + * + * This means it is in memory on all replicas. + * + * @note BUG: this should be void + * + * @param c operation to wait for + * @returns 0 + */ +CEPH_RADOS_API int rados_aio_wait_for_complete(rados_completion_t c); + +/** + * Block until an operation is safe + * + * This means it is on stable storage on all replicas. + * + * @note BUG: this should be void + * + * @param c operation to wait for + * @returns 0 + */ +CEPH_RADOS_API int rados_aio_wait_for_safe(rados_completion_t c); + +/** + * Has an asynchronous operation completed? + * + * @warning This does not imply that the complete callback has + * finished + * + * @param c async operation to inspect + * @returns whether c is complete + */ +CEPH_RADOS_API int rados_aio_is_complete(rados_completion_t c); + +/** + * Is an asynchronous operation safe? + * + * @warning This does not imply that the safe callback has + * finished + * + * @param c async operation to inspect + * @returns whether c is safe + */ +CEPH_RADOS_API int rados_aio_is_safe(rados_completion_t c); + +/** + * Block until an operation completes and callback completes + * + * This means it is in memory on all replicas and can be read. + * + * @note BUG: this should be void + * + * @param c operation to wait for + * @returns 0 + */ +CEPH_RADOS_API int rados_aio_wait_for_complete_and_cb(rados_completion_t c); + +/** + * Block until an operation is safe and callback has completed + * + * This means it is on stable storage on all replicas. + * + * @note BUG: this should be void + * + * @param c operation to wait for + * @returns 0 + */ +CEPH_RADOS_API int rados_aio_wait_for_safe_and_cb(rados_completion_t c); + +/** + * Has an asynchronous operation and callback completed + * + * @param c async operation to inspect + * @returns whether c is complete + */ +CEPH_RADOS_API int rados_aio_is_complete_and_cb(rados_completion_t c); + +/** + * Is an asynchronous operation safe and has the callback completed + * + * @param c async operation to inspect + * @returns whether c is safe + */ +CEPH_RADOS_API int rados_aio_is_safe_and_cb(rados_completion_t c); + +/** + * Get the return value of an asychronous operation + * + * The return value is set when the operation is complete or safe, + * whichever comes first. + * + * @pre The operation is safe or complete + * + * @note BUG: complete callback may never be called when the safe + * message is received before the complete message + * + * @param c async operation to inspect + * @returns return value of the operation + */ +CEPH_RADOS_API int rados_aio_get_return_value(rados_completion_t c); + +/** + * Get the internal object version of the target of an asychronous operation + * + * The return value is set when the operation is complete or safe, + * whichever comes first. + * + * @pre The operation is safe or complete + * + * @note BUG: complete callback may never be called when the safe + * message is received before the complete message + * + * @param c async operation to inspect + * @returns version number of the asychronous operation's target + */ +CEPH_RADOS_API uint64_t rados_aio_get_version(rados_completion_t c); + +/** + * Release a completion + * + * Call this when you no longer need the completion. It may not be + * freed immediately if the operation is not acked and committed. + * + * @param c completion to release + */ +CEPH_RADOS_API void rados_aio_release(rados_completion_t c); + +/** + * Write data to an object asynchronously + * + * Queues the write and returns. The return value of the completion + * will be 0 on success, negative error code on failure. + * + * @param io the context in which the write will occur + * @param oid name of the object + * @param completion what to do when the write is safe and complete + * @param buf data to write + * @param len length of the data, in bytes + * @param off byte offset in the object to begin writing at + * @returns 0 on success, -EROFS if the io context specifies a snap_seq + * other than LIBRADOS_SNAP_HEAD + */ +CEPH_RADOS_API int rados_aio_write(rados_ioctx_t io, const char *oid, + rados_completion_t completion, + const char *buf, size_t len, uint64_t off); + +/** + * Asychronously append data to an object + * + * Queues the append and returns. + * + * The return value of the completion will be 0 on success, negative + * error code on failure. + * + * @param io the context to operate in + * @param oid the name of the object + * @param completion what to do when the append is safe and complete + * @param buf the data to append + * @param len length of buf (in bytes) + * @returns 0 on success, -EROFS if the io context specifies a snap_seq + * other than LIBRADOS_SNAP_HEAD + */ +CEPH_RADOS_API int rados_aio_append(rados_ioctx_t io, const char *oid, + rados_completion_t completion, + const char *buf, size_t len); + +/** + * Asychronously write an entire object + * + * The object is filled with the provided data. If the object exists, + * it is atomically truncated and then written. + * Queues the write_full and returns. + * + * The return value of the completion will be 0 on success, negative + * error code on failure. + * + * @param io the io context in which the write will occur + * @param oid name of the object + * @param completion what to do when the write_full is safe and complete + * @param buf data to write + * @param len length of the data, in bytes + * @returns 0 on success, -EROFS if the io context specifies a snap_seq + * other than LIBRADOS_SNAP_HEAD + */ +CEPH_RADOS_API int rados_aio_write_full(rados_ioctx_t io, const char *oid, + rados_completion_t completion, + const char *buf, size_t len); + +/** + * Asychronously write the same buffer multiple times + * + * Queues the writesame and returns. + * + * The return value of the completion will be 0 on success, negative + * error code on failure. + * + * @param io the io context in which the write will occur + * @param oid name of the object + * @param completion what to do when the writesame is safe and complete + * @param buf data to write + * @param data_len length of the data, in bytes + * @param write_len the total number of bytes to write + * @param off byte offset in the object to begin writing at + * @returns 0 on success, -EROFS if the io context specifies a snap_seq + * other than LIBRADOS_SNAP_HEAD + */ +CEPH_RADOS_API int rados_aio_writesame(rados_ioctx_t io, const char *oid, + rados_completion_t completion, + const char *buf, size_t data_len, + size_t write_len, uint64_t off); + +/** + * Asychronously remove an object + * + * Queues the remove and returns. + * + * The return value of the completion will be 0 on success, negative + * error code on failure. + * + * @param io the context to operate in + * @param oid the name of the object + * @param completion what to do when the remove is safe and complete + * @returns 0 on success, -EROFS if the io context specifies a snap_seq + * other than LIBRADOS_SNAP_HEAD + */ +CEPH_RADOS_API int rados_aio_remove(rados_ioctx_t io, const char *oid, + rados_completion_t completion); + +/** + * Asychronously read data from an object + * + * The io context determines the snapshot to read from, if any was set + * by rados_ioctx_snap_set_read(). + * + * The return value of the completion will be number of bytes read on + * success, negative error code on failure. + * + * @note only the 'complete' callback of the completion will be called. + * + * @param io the context in which to perform the read + * @param oid the name of the object to read from + * @param completion what to do when the read is complete + * @param buf where to store the results + * @param len the number of bytes to read + * @param off the offset to start reading from in the object + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_read(rados_ioctx_t io, const char *oid, + rados_completion_t completion, + char *buf, size_t len, uint64_t off); + +/** + * Block until all pending writes in an io context are safe + * + * This is not equivalent to calling rados_aio_wait_for_safe() on all + * write completions, since this waits for the associated callbacks to + * complete as well. + * + * @note BUG: always returns 0, should be void or accept a timeout + * + * @param io the context to flush + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_flush(rados_ioctx_t io); + + +/** + * Schedule a callback for when all currently pending + * aio writes are safe. This is a non-blocking version of + * rados_aio_flush(). + * + * @param io the context to flush + * @param completion what to do when the writes are safe + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_flush_async(rados_ioctx_t io, + rados_completion_t completion); + + +/** + * Asynchronously get object stats (size/mtime) + * + * @param io ioctx + * @param o object name + * @param psize where to store object size + * @param pmtime where to store modification time + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_stat(rados_ioctx_t io, const char *o, + rados_completion_t completion, + uint64_t *psize, time_t *pmtime); + +/** + * Asynchronously compare an on-disk object range with a buffer + * + * @param io the context in which to perform the comparison + * @param o the name of the object to compare with + * @param completion what to do when the comparison is complete + * @param cmp_buf buffer containing bytes to be compared with object contents + * @param cmp_len length to compare and size of @cmp_buf in bytes + * @param off object byte offset at which to start the comparison + * @returns 0 on success, negative error code on failure, + * (-MAX_ERRNO - mismatch_off) on mismatch + */ +CEPH_RADOS_API int rados_aio_cmpext(rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *cmp_buf, + size_t cmp_len, + uint64_t off); + +/** + * Cancel async operation + * + * @param io ioctx + * @param completion completion handle + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_cancel(rados_ioctx_t io, + rados_completion_t completion); + +/** + * Asynchronously execute an OSD class method on an object + * + * The OSD has a plugin mechanism for performing complicated + * operations on an object atomically. These plugins are called + * classes. This function allows librados users to call the custom + * methods. The input and output formats are defined by the class. + * Classes in ceph.git can be found in src/cls subdirectories + * + * @param io the context in which to call the method + * @param oid the object to call the method on + * @param cls the name of the class + * @param method the name of the method + * @param in_buf where to find input + * @param in_len length of in_buf in bytes + * @param buf where to store output + * @param out_len length of buf in bytes + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_exec(rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *cls, const char *method, + const char *in_buf, size_t in_len, + char *buf, size_t out_len); + +/** @} Asynchronous I/O */ + +/** + * @name Asynchronous Xattrs + * Extended attributes are stored as extended attributes on the files + * representing an object on the OSDs. Thus, they have the same + * limitations as the underlying filesystem. On ext4, this means that + * the total data stored in xattrs cannot exceed 4KB. + * + * @{ + */ + +/** + * Asynchronously get the value of an extended attribute on an object. + * + * @param io the context in which the attribute is read + * @param o name of the object + * @param completion what to do when the getxattr completes + * @param name which extended attribute to read + * @param buf where to store the result + * @param len size of buf in bytes + * @returns length of xattr value on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_getxattr(rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *name, char *buf, size_t len); + +/** + * Asynchronously set an extended attribute on an object. + * + * @param io the context in which xattr is set + * @param o name of the object + * @param completion what to do when the setxattr completes + * @param name which extended attribute to set + * @param buf what to store in the xattr + * @param len the number of bytes in buf + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_setxattr(rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *name, const char *buf, + size_t len); + +/** + * Asynchronously delete an extended attribute from an object. + * + * @param io the context in which to delete the xattr + * @param o the name of the object + * @param completion what to do when the rmxattr completes + * @param name which xattr to delete + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_rmxattr(rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *name); + +/** + * Asynchronously start iterating over xattrs on an object. + * + * @post iter is a valid iterator + * + * @param io the context in which to list xattrs + * @param oid name of the object + * @param iter where to store the iterator + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_getxattrs(rados_ioctx_t io, const char *oid, + rados_completion_t completion, + rados_xattrs_iter_t *iter); + +/** @} Asynchronous Xattrs */ + +/** + * @name Watch/Notify + * + * Watch/notify is a protocol to help communicate among clients. It + * can be used to sychronize client state. All that's needed is a + * well-known object name (for example, rbd uses the header object of + * an image). + * + * Watchers register an interest in an object, and receive all + * notifies on that object. A notify attempts to communicate with all + * clients watching an object, and blocks on the notifier until each + * client responds or a timeout is reached. + * + * See rados_watch() and rados_notify() for more details. + * + * @{ + */ + +/** + * @typedef rados_watchcb_t + * + * Callback activated when a notify is received on a watched + * object. + * + * @param opcode undefined + * @param ver version of the watched object + * @param arg application-specific data + * + * @note BUG: opcode is an internal detail that shouldn't be exposed + * @note BUG: ver is unused + */ +typedef void (*rados_watchcb_t)(uint8_t opcode, uint64_t ver, void *arg); + +/** + * @typedef rados_watchcb2_t + * + * Callback activated when a notify is received on a watched + * object. + * + * @param arg opaque user-defined value provided to rados_watch2() + * @param notify_id an id for this notify event + * @param handle the watcher handle we are notifying + * @param notifier_id the unique client id for the notifier + * @param data payload from the notifier + * @param datalen length of payload buffer + */ +typedef void (*rados_watchcb2_t)(void *arg, + uint64_t notify_id, + uint64_t handle, + uint64_t notifier_id, + void *data, + size_t data_len); + +/** + * @typedef rados_watcherrcb_t + * + * Callback activated when we encounter an error with the watch session. + * This can happen when the location of the objects moves within the + * cluster and we fail to register our watch with the new object location, + * or when our connection with the object OSD is otherwise interrupted and + * we may have missed notify events. + * + * @param pre opaque user-defined value provided to rados_watch2() + * @param err error code + */ + typedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err); + +/** + * Register an interest in an object + * + * A watch operation registers the client as being interested in + * notifications on an object. OSDs keep track of watches on + * persistent storage, so they are preserved across cluster changes by + * the normal recovery process. If the client loses its connection to + * the primary OSD for a watched object, the watch will be removed + * after 30 seconds. Watches are automatically reestablished when a new + * connection is made, or a placement group switches OSDs. + * + * @note BUG: librados should provide a way for watchers to notice connection resets + * @note BUG: the ver parameter does not work, and -ERANGE will never be returned + * (See URL tracker.ceph.com/issues/2592) + * + * @param io the pool the object is in + * @param o the object to watch + * @param ver expected version of the object + * @param cookie where to store the internal id assigned to this watch + * @param watchcb what to do when a notify is received on this object + * @param arg application defined data to pass when watchcb is called + * @returns 0 on success, negative error code on failure + * @returns -ERANGE if the version of the object is greater than ver + */ +CEPH_RADOS_API int rados_watch(rados_ioctx_t io, const char *o, uint64_t ver, + uint64_t *cookie, + rados_watchcb_t watchcb, void *arg) + __attribute__((deprecated)); + + +/** + * Register an interest in an object + * + * A watch operation registers the client as being interested in + * notifications on an object. OSDs keep track of watches on + * persistent storage, so they are preserved across cluster changes by + * the normal recovery process. If the client loses its connection to the + * primary OSD for a watched object, the watch will be removed after + * a timeout configured with osd_client_watch_timeout. + * Watches are automatically reestablished when a new + * connection is made, or a placement group switches OSDs. + * + * @param io the pool the object is in + * @param o the object to watch + * @param cookie where to store the internal id assigned to this watch + * @param watchcb what to do when a notify is received on this object + * @param watcherrcb what to do when the watch session encounters an error + * @param arg opaque value to pass to the callback + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_watch2(rados_ioctx_t io, const char *o, uint64_t *cookie, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, + void *arg); + +/** + * Register an interest in an object + * + * A watch operation registers the client as being interested in + * notifications on an object. OSDs keep track of watches on + * persistent storage, so they are preserved across cluster changes by + * the normal recovery process. Watches are automatically reestablished when a new + * connection is made, or a placement group switches OSDs. + * + * @param io the pool the object is in + * @param o the object to watch + * @param cookie where to store the internal id assigned to this watch + * @param watchcb what to do when a notify is received on this object + * @param watcherrcb what to do when the watch session encounters an error + * @param timeout how many seconds the connection will keep after disconnection + * @param arg opaque value to pass to the callback + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, + uint32_t timeout, + void *arg); + +/** + * Asynchronous register an interest in an object + * + * A watch operation registers the client as being interested in + * notifications on an object. OSDs keep track of watches on + * persistent storage, so they are preserved across cluster changes by + * the normal recovery process. If the client loses its connection to + * the primary OSD for a watched object, the watch will be removed + * after 30 seconds. Watches are automatically reestablished when a new + * connection is made, or a placement group switches OSDs. + * + * @param io the pool the object is in + * @param o the object to watch + * @param completion what to do when operation has been attempted + * @param handle where to store the internal id assigned to this watch + * @param watchcb what to do when a notify is received on this object + * @param watcherrcb what to do when the watch session encounters an error + * @param arg opaque value to pass to the callback + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_watch(rados_ioctx_t io, const char *o, + rados_completion_t completion, uint64_t *handle, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, + void *arg); + +/** + * Asynchronous register an interest in an object + * + * A watch operation registers the client as being interested in + * notifications on an object. OSDs keep track of watches on + * persistent storage, so they are preserved across cluster changes by + * the normal recovery process. If the client loses its connection to + * the primary OSD for a watched object, the watch will be removed + * after the number of seconds that configured in timeout parameter. + * Watches are automatically reestablished when a new + * connection is made, or a placement group switches OSDs. + * + * @param io the pool the object is in + * @param o the object to watch + * @param completion what to do when operation has been attempted + * @param handle where to store the internal id assigned to this watch + * @param watchcb what to do when a notify is received on this object + * @param watcherrcb what to do when the watch session encounters an error + * @param timeout how many seconds the connection will keep after disconnection + * @param arg opaque value to pass to the callback + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_watch2(rados_ioctx_t io, const char *o, + rados_completion_t completion, uint64_t *handle, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, + uint32_t timeout, + void *arg); + +/** + * Check on the status of a watch + * + * Return the number of milliseconds since the watch was last confirmed. + * Or, if there has been an error, return that. + * + * If there is an error, the watch is no longer valid, and should be + * destroyed with rados_unwatch2(). The the user is still interested + * in the object, a new watch should be created with rados_watch2(). + * + * @param io the pool the object is in + * @param cookie the watch handle + * @returns ms since last confirmed on success, negative error code on failure + */ +CEPH_RADOS_API int rados_watch_check(rados_ioctx_t io, uint64_t cookie); + +/** + * Unregister an interest in an object + * + * Once this completes, no more notifies will be sent to us for this + * watch. This should be called to clean up unneeded watchers. + * + * @param io the pool the object is in + * @param o the name of the watched object (ignored) + * @param cookie which watch to unregister + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_unwatch(rados_ioctx_t io, const char *o, uint64_t cookie) + __attribute__((deprecated)); + +/** + * Unregister an interest in an object + * + * Once this completes, no more notifies will be sent to us for this + * watch. This should be called to clean up unneeded watchers. + * + * @param io the pool the object is in + * @param cookie which watch to unregister + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_unwatch2(rados_ioctx_t io, uint64_t cookie); + +/** + * Asynchronous unregister an interest in an object + * + * Once this completes, no more notifies will be sent to us for this + * watch. This should be called to clean up unneeded watchers. + * + * @param io the pool the object is in + * @param completion what to do when operation has been attempted + * @param cookie which watch to unregister + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_unwatch(rados_ioctx_t io, uint64_t cookie, + rados_completion_t completion); + +/** + * Sychronously notify watchers of an object + * + * This blocks until all watchers of the object have received and + * reacted to the notify, or a timeout is reached. + * + * @note BUG: the timeout is not changeable via the C API + * @note BUG: the bufferlist is inaccessible in a rados_watchcb_t + * + * @param io the pool the object is in + * @param o the name of the object + * @param ver obsolete - just pass zero + * @param buf data to send to watchers + * @param buf_len length of buf in bytes + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_notify(rados_ioctx_t io, const char *o, uint64_t ver, + const char *buf, int buf_len) + __attribute__((deprecated)); + +/** + * Sychronously notify watchers of an object + * + * This blocks until all watchers of the object have received and + * reacted to the notify, or a timeout is reached. + * + * The reply buffer is optional. If specified, the client will get + * back an encoded buffer that includes the ids of the clients that + * acknowledged the notify as well as their notify ack payloads (if + * any). Clients that timed out are not included. Even clients that + * do not include a notify ack payload are included in the list but + * have a 0-length payload associated with them. The format: + * + * le32 num_acks + * { + * le64 gid global id for the client (for client.1234 that's 1234) + * le64 cookie cookie for the client + * le32 buflen length of reply message buffer + * u8 * buflen payload + * } * num_acks + * le32 num_timeouts + * { + * le64 gid global id for the client + * le64 cookie cookie for the client + * } * num_timeouts + * + * Note: There may be multiple instances of the same gid if there are + * multiple watchers registered via the same client. + * + * Note: The buffer must be released with rados_buffer_free() when the + * user is done with it. + * + * Note: Since the result buffer includes clients that time out, it + * will be set even when rados_notify() returns an error code (like + * -ETIMEDOUT). + * + * @param io the pool the object is in + * @param completion what to do when operation has been attempted + * @param o the name of the object + * @param buf data to send to watchers + * @param buf_len length of buf in bytes + * @param timeout_ms notify timeout (in ms) + * @param reply_buffer pointer to reply buffer pointer (free with rados_buffer_free) + * @param reply_buffer_len pointer to size of reply buffer + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_notify2(rados_ioctx_t io, const char *o, + const char *buf, int buf_len, + uint64_t timeout_ms, + char **reply_buffer, size_t *reply_buffer_len); +CEPH_RADOS_API int rados_aio_notify(rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *buf, int buf_len, + uint64_t timeout_ms, char **reply_buffer, + size_t *reply_buffer_len); + +/** + * Acknolwedge receipt of a notify + * + * @param io the pool the object is in + * @param o the name of the object + * @param notify_id the notify_id we got on the watchcb2_t callback + * @param cookie the watcher handle + * @param buf payload to return to notifier (optional) + * @param buf_len payload length + * @returns 0 on success + */ +CEPH_RADOS_API int rados_notify_ack(rados_ioctx_t io, const char *o, + uint64_t notify_id, uint64_t cookie, + const char *buf, int buf_len); + +/** + * Flush watch/notify callbacks + * + * This call will block until all pending watch/notify callbacks have + * been executed and the queue is empty. It should usually be called + * after shutting down any watches before shutting down the ioctx or + * librados to ensure that any callbacks do not misuse the ioctx (for + * example by calling rados_notify_ack after the ioctx has been + * destroyed). + * + * @param cluster the cluster handle + */ +CEPH_RADOS_API int rados_watch_flush(rados_t cluster); +/** + * Flush watch/notify callbacks + * + * This call will be nonblock, and the completion will be called + * until all pending watch/notify callbacks have been executed and + * the queue is empty. It should usually be called after shutting + * down any watches before shutting down the ioctx or + * librados to ensure that any callbacks do not misuse the ioctx (for + * example by calling rados_notify_ack after the ioctx has been + * destroyed). + * + * @param cluster the cluster handle + * @param completion what to do when operation has been attempted + */ +CEPH_RADOS_API int rados_aio_watch_flush(rados_t cluster, rados_completion_t completion); + +/** @} Watch/Notify */ + +/** + * Pin an object in the cache tier + * + * When an object is pinned in the cache tier, it stays in the cache + * tier, and won't be flushed out. + * + * @param io the pool the object is in + * @param o the object id + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_cache_pin(rados_ioctx_t io, const char *o); + +/** + * Unpin an object in the cache tier + * + * After an object is unpinned in the cache tier, it can be flushed out + * + * @param io the pool the object is in + * @param o the object id + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_cache_unpin(rados_ioctx_t io, const char *o); + +/** + * @name Hints + * + * @{ + */ + +/** + * Set allocation hint for an object + * + * This is an advisory operation, it will always succeed (as if it was + * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not + * guaranteed to do anything on the backend. + * + * @param io the pool the object is in + * @param o the name of the object + * @param expected_object_size expected size of the object, in bytes + * @param expected_write_size expected size of writes to the object, in bytes + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_set_alloc_hint(rados_ioctx_t io, const char *o, + uint64_t expected_object_size, + uint64_t expected_write_size); + +/** + * Set allocation hint for an object + * + * This is an advisory operation, it will always succeed (as if it was + * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not + * guaranteed to do anything on the backend. + * + * @param io the pool the object is in + * @param o the name of the object + * @param expected_object_size expected size of the object, in bytes + * @param expected_write_size expected size of writes to the object, in bytes + * @param flags hints about future IO patterns + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_set_alloc_hint2(rados_ioctx_t io, const char *o, + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags); + +/** @} Hints */ + +/** + * @name Object Operations + * + * A single rados operation can do multiple operations on one object + * atomicly. The whole operation will suceed or fail, and no partial + * results will be visible. + * + * Operations may be either reads, which can return data, or writes, + * which cannot. The effects of writes are applied and visible all at + * once, so an operation that sets an xattr and then checks its value + * will not see the updated value. + * + * @{ + */ + +/** + * Create a new rados_write_op_t write operation. This will store all actions + * to be performed atomically. You must call rados_release_write_op when you are + * finished with it. + * + * @returns non-NULL on success, NULL on memory allocation error. + */ +CEPH_RADOS_API rados_write_op_t rados_create_write_op(void); + +/** + * Free a rados_write_op_t, must be called when you're done with it. + * @param write_op operation to deallocate, created with rados_create_write_op + */ +CEPH_RADOS_API void rados_release_write_op(rados_write_op_t write_op); + +/** + * Set flags for the last operation added to this write_op. + * At least one op must have been added to the write_op. + * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG + */ +CEPH_RADOS_API void rados_write_op_set_flags(rados_write_op_t write_op, + int flags); + +/** + * Ensure that the object exists before writing + * @param write_op operation to add this action to + */ +CEPH_RADOS_API void rados_write_op_assert_exists(rados_write_op_t write_op); + +/** + * Ensure that the object exists and that its internal version + * number is equal to "ver" before writing. "ver" should be a + * version number previously obtained with rados_get_last_version(). + * - If the object's version is greater than the asserted version + * then rados_write_op_operate will return -ERANGE instead of + * executing the op. + * - If the object's version is less than the asserted version + * then rados_write_op_operate will return -EOVERFLOW instead + * of executing the op. + * @param write_op operation to add this action to + * @param ver object version number + */ +CEPH_RADOS_API void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver); + +/** + * Ensure that given object range (extent) satisfies comparison. + * + * @param write_op operation to add this action to + * @param cmp_buf buffer containing bytes to be compared with object contents + * @param cmp_len length to compare and size of @cmp_buf in bytes + * @param off object byte offset at which to start the comparison + * @param prval returned result of comparison, 0 on success, negative error code + * on failure, (-MAX_ERRNO - mismatch_off) on mismatch + */ +CEPH_RADOS_API void rados_write_op_cmpext(rados_write_op_t write_op, + const char *cmp_buf, + size_t cmp_len, + uint64_t off, + int *prval); + +/** + * Ensure that given xattr satisfies comparison. + * If the comparison is not satisfied, the return code of the + * operation will be -ECANCELED + * @param write_op operation to add this action to + * @param name name of the xattr to look up + * @param comparison_operator currently undocumented, look for + * LIBRADOS_CMPXATTR_OP_EQ in librados.h + * @param value buffer to compare actual xattr value to + * @param value_len length of buffer to compare actual xattr value to + */ +CEPH_RADOS_API void rados_write_op_cmpxattr(rados_write_op_t write_op, + const char *name, + uint8_t comparison_operator, + const char *value, + size_t value_len); + +/** + * Ensure that the an omap value satisfies a comparison, + * with the supplied value on the right hand side (i.e. + * for OP_LT, the comparison is actual_value < value. + * + * @param write_op operation to add this action to + * @param key which omap value to compare + * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ, + LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT + * @param val value to compare with + * @param val_len length of value in bytes + * @param prval where to store the return value from this action + */ +CEPH_RADOS_API void rados_write_op_omap_cmp(rados_write_op_t write_op, + const char *key, + uint8_t comparison_operator, + const char *val, + size_t val_len, + int *prval); + +/** + * Set an xattr + * @param write_op operation to add this action to + * @param name name of the xattr + * @param value buffer to set xattr to + * @param value_len length of buffer to set xattr to + */ +CEPH_RADOS_API void rados_write_op_setxattr(rados_write_op_t write_op, + const char *name, + const char *value, + size_t value_len); + +/** + * Remove an xattr + * @param write_op operation to add this action to + * @param name name of the xattr to remove + */ +CEPH_RADOS_API void rados_write_op_rmxattr(rados_write_op_t write_op, + const char *name); + +/** + * Create the object + * @param write_op operation to add this action to + * @param exclusive set to either LIBRADOS_CREATE_EXCLUSIVE or + LIBRADOS_CREATE_IDEMPOTENT + * will error if the object already exists. + * @param category category string (DEPRECATED, HAS NO EFFECT) + */ +CEPH_RADOS_API void rados_write_op_create(rados_write_op_t write_op, + int exclusive, + const char* category); + +/** + * Write to offset + * @param write_op operation to add this action to + * @param offset offset to write to + * @param buffer bytes to write + * @param len length of buffer + */ +CEPH_RADOS_API void rados_write_op_write(rados_write_op_t write_op, + const char *buffer, + size_t len, + uint64_t offset); + +/** + * Write whole object, atomically replacing it. + * @param write_op operation to add this action to + * @param buffer bytes to write + * @param len length of buffer + */ +CEPH_RADOS_API void rados_write_op_write_full(rados_write_op_t write_op, + const char *buffer, + size_t len); + +/** + * Write the same buffer multiple times + * @param write_op operation to add this action to + * @param buffer bytes to write + * @param data_len length of buffer + * @param write_len total number of bytes to write, as a multiple of @data_len + * @param offset offset to write to + */ +CEPH_RADOS_API void rados_write_op_writesame(rados_write_op_t write_op, + const char *buffer, + size_t data_len, + size_t write_len, + uint64_t offset); + +/** + * Append to end of object. + * @param write_op operation to add this action to + * @param buffer bytes to write + * @param len length of buffer + */ +CEPH_RADOS_API void rados_write_op_append(rados_write_op_t write_op, + const char *buffer, + size_t len); +/** + * Remove object + * @param write_op operation to add this action to + */ +CEPH_RADOS_API void rados_write_op_remove(rados_write_op_t write_op); + +/** + * Truncate an object + * @param write_op operation to add this action to + * @param offset Offset to truncate to + */ +CEPH_RADOS_API void rados_write_op_truncate(rados_write_op_t write_op, + uint64_t offset); + +/** + * Zero part of an object + * @param write_op operation to add this action to + * @param offset Offset to zero + * @param len length to zero + */ +CEPH_RADOS_API void rados_write_op_zero(rados_write_op_t write_op, + uint64_t offset, + uint64_t len); + +/** + * Execute an OSD class method on an object + * See rados_exec() for general description. + * + * @param write_op operation to add this action to + * @param cls the name of the class + * @param method the name of the method + * @param in_buf where to find input + * @param in_len length of in_buf in bytes + * @param prval where to store the return value from the method + */ +CEPH_RADOS_API void rados_write_op_exec(rados_write_op_t write_op, + const char *cls, + const char *method, + const char *in_buf, + size_t in_len, + int *prval); + +/** + * Set key/value pairs on an object + * + * @param write_op operation to add this action to + * @param keys array of null-terminated char arrays representing keys to set + * @param vals array of pointers to values to set + * @param lens array of lengths corresponding to each value + * @param num number of key/value pairs to set + */ +CEPH_RADOS_API void rados_write_op_omap_set(rados_write_op_t write_op, + char const* const* keys, + char const* const* vals, + const size_t *lens, + size_t num); + +/** + * Remove key/value pairs from an object + * + * @param write_op operation to add this action to + * @param keys array of null-terminated char arrays representing keys to remove + * @param keys_len number of key/value pairs to remove + */ +CEPH_RADOS_API void rados_write_op_omap_rm_keys(rados_write_op_t write_op, + char const* const* keys, + size_t keys_len); + +/** + * Remove all key/value pairs from an object + * + * @param write_op operation to add this action to + */ +CEPH_RADOS_API void rados_write_op_omap_clear(rados_write_op_t write_op); + +/** + * Set allocation hint for an object + * + * @param write_op operation to add this action to + * @param expected_object_size expected size of the object, in bytes + * @param expected_write_size expected size of writes to the object, in bytes + */ +CEPH_RADOS_API void rados_write_op_set_alloc_hint(rados_write_op_t write_op, + uint64_t expected_object_size, + uint64_t expected_write_size); + +/** + * Set allocation hint for an object + * + * @param write_op operation to add this action to + * @param expected_object_size expected size of the object, in bytes + * @param expected_write_size expected size of writes to the object, in bytes + * @param flags hints about future IO patterns + */ +CEPH_RADOS_API void rados_write_op_set_alloc_hint2(rados_write_op_t write_op, + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags); + +/** + * Perform a write operation synchronously + * @param write_op operation to perform + * @param io the ioctx that the object is in + * @param oid the object id + * @param mtime the time to set the mtime to, NULL for the current time + * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*) + */ +CEPH_RADOS_API int rados_write_op_operate(rados_write_op_t write_op, + rados_ioctx_t io, + const char *oid, + time_t *mtime, + int flags); +/** + * Perform a write operation synchronously + * @param write_op operation to perform + * @param io the ioctx that the object is in + * @param oid the object id + * @param mtime the time to set the mtime to, NULL for the current time + * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*) + */ + +CEPH_RADOS_API int rados_write_op_operate2(rados_write_op_t write_op, + rados_ioctx_t io, + const char *oid, + struct timespec *mtime, + int flags); + +/** + * Perform a write operation asynchronously + * @param write_op operation to perform + * @param io the ioctx that the object is in + * @param completion what to do when operation has been attempted + * @param oid the object id + * @param mtime the time to set the mtime to, NULL for the current time + * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*) + */ +CEPH_RADOS_API int rados_aio_write_op_operate(rados_write_op_t write_op, + rados_ioctx_t io, + rados_completion_t completion, + const char *oid, + time_t *mtime, + int flags); + +/** + * Create a new rados_read_op_t write operation. This will store all + * actions to be performed atomically. You must call + * rados_release_read_op when you are finished with it (after it + * completes, or you decide not to send it in the first place). + * + * @returns non-NULL on success, NULL on memory allocation error. + */ +CEPH_RADOS_API rados_read_op_t rados_create_read_op(void); + +/** + * Free a rados_read_op_t, must be called when you're done with it. + * @param read_op operation to deallocate, created with rados_create_read_op + */ +CEPH_RADOS_API void rados_release_read_op(rados_read_op_t read_op); + +/** + * Set flags for the last operation added to this read_op. + * At least one op must have been added to the read_op. + * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG + */ +CEPH_RADOS_API void rados_read_op_set_flags(rados_read_op_t read_op, int flags); + +/** + * Ensure that the object exists before reading + * @param read_op operation to add this action to + */ +CEPH_RADOS_API void rados_read_op_assert_exists(rados_read_op_t read_op); + +/** + * Ensure that the object exists and that its internal version + * number is equal to "ver" before reading. "ver" should be a + * version number previously obtained with rados_get_last_version(). + * - If the object's version is greater than the asserted version + * then rados_read_op_operate will return -ERANGE instead of + * executing the op. + * - If the object's version is less than the asserted version + * then rados_read_op_operate will return -EOVERFLOW instead + * of executing the op. + * @param read_op operation to add this action to + * @param ver object version number + */ +CEPH_RADOS_API void rados_read_op_assert_version(rados_read_op_t read_op, uint64_t ver); + +/** + * Ensure that given object range (extent) satisfies comparison. + * + * @param read_op operation to add this action to + * @param cmp_buf buffer containing bytes to be compared with object contents + * @param cmp_len length to compare and size of @cmp_buf in bytes + * @param off object byte offset at which to start the comparison + * @param prval returned result of comparison, 0 on success, negative error code + * on failure, (-MAX_ERRNO - mismatch_off) on mismatch + */ +CEPH_RADOS_API void rados_read_op_cmpext(rados_read_op_t read_op, + const char *cmp_buf, + size_t cmp_len, + uint64_t off, + int *prval); + +/** + * Ensure that the an xattr satisfies a comparison + * If the comparison is not satisfied, the return code of the + * operation will be -ECANCELED + * @param read_op operation to add this action to + * @param name name of the xattr to look up + * @param comparison_operator currently undocumented, look for + * LIBRADOS_CMPXATTR_OP_EQ in librados.h + * @param value buffer to compare actual xattr value to + * @param value_len length of buffer to compare actual xattr value to + */ +CEPH_RADOS_API void rados_read_op_cmpxattr(rados_read_op_t read_op, + const char *name, + uint8_t comparison_operator, + const char *value, + size_t value_len); + +/** + * Start iterating over xattrs on an object. + * + * @param read_op operation to add this action to + * @param iter where to store the iterator + * @param prval where to store the return value of this action + */ +CEPH_RADOS_API void rados_read_op_getxattrs(rados_read_op_t read_op, + rados_xattrs_iter_t *iter, + int *prval); + +/** + * Ensure that the an omap value satisfies a comparison, + * with the supplied value on the right hand side (i.e. + * for OP_LT, the comparison is actual_value < value. + * + * @param read_op operation to add this action to + * @param key which omap value to compare + * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ, + LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT + * @param val value to compare with + * @param val_len length of value in bytes + * @param prval where to store the return value from this action + */ +CEPH_RADOS_API void rados_read_op_omap_cmp(rados_read_op_t read_op, + const char *key, + uint8_t comparison_operator, + const char *val, + size_t val_len, + int *prval); + +/** + * Get object size and mtime + * @param read_op operation to add this action to + * @param psize where to store object size + * @param pmtime where to store modification time + * @param prval where to store the return value of this action + */ +CEPH_RADOS_API void rados_read_op_stat(rados_read_op_t read_op, + uint64_t *psize, + time_t *pmtime, + int *prval); + +/** + * Read bytes from offset into buffer. + * + * prlen will be filled with the number of bytes read if successful. + * A short read can only occur if the read reaches the end of the + * object. + * + * @param read_op operation to add this action to + * @param offset offset to read from + * @param len length of buffer + * @param buffer where to put the data + * @param bytes_read where to store the number of bytes read by this action + * @param prval where to store the return value of this action + */ +CEPH_RADOS_API void rados_read_op_read(rados_read_op_t read_op, + uint64_t offset, + size_t len, + char *buffer, + size_t *bytes_read, + int *prval); + +/** + * Compute checksum from object data + * + * @param read_op operation to add this action to + * @param oid the name of the object to checksum + * @param type the checksum algorithm to utilize + * @param init_value the init value for the algorithm + * @param init_value_len the length of the init value + * @param len the number of bytes to checksum + * @param off the offset to start checksuming in the object + * @param chunk_size optional length-aligned chunk size for checksums + * @param pchecksum where to store the checksum result for this action + * @param checksum_len the number of bytes available for the result + * @param prval where to store the return value for this action + */ +CEPH_RADOS_API void rados_read_op_checksum(rados_read_op_t read_op, + rados_checksum_type_t type, + const char *init_value, + size_t init_value_len, + uint64_t offset, size_t len, + size_t chunk_size, char *pchecksum, + size_t checksum_len, int *prval); + +/** + * Execute an OSD class method on an object + * See rados_exec() for general description. + * + * The output buffer is allocated on the heap; the caller is + * expected to release that memory with rados_buffer_free(). The + * buffer and length pointers can all be NULL, in which case they are + * not filled in. + * + * @param read_op operation to add this action to + * @param cls the name of the class + * @param method the name of the method + * @param in_buf where to find input + * @param in_len length of in_buf in bytes + * @param out_buf where to put librados-allocated output buffer + * @param out_len length of out_buf in bytes + * @param prval where to store the return value from the method + */ +CEPH_RADOS_API void rados_read_op_exec(rados_read_op_t read_op, + const char *cls, + const char *method, + const char *in_buf, + size_t in_len, + char **out_buf, + size_t *out_len, + int *prval); + +/** + * Execute an OSD class method on an object + * See rados_exec() for general description. + * + * If the output buffer is too small, prval will + * be set to -ERANGE and used_len will be 0. + * + * @param read_op operation to add this action to + * @param cls the name of the class + * @param method the name of the method + * @param in_buf where to find input + * @param in_len length of in_buf in bytes + * @param out_buf user-provided buffer to read into + * @param out_len length of out_buf in bytes + * @param used_len where to store the number of bytes read into out_buf + * @param prval where to store the return value from the method + */ +CEPH_RADOS_API void rados_read_op_exec_user_buf(rados_read_op_t read_op, + const char *cls, + const char *method, + const char *in_buf, + size_t in_len, + char *out_buf, + size_t out_len, + size_t *used_len, + int *prval); + +/** + * Start iterating over key/value pairs on an object. + * + * They will be returned sorted by key. + * + * @param read_op operation to add this action to + * @param start_after list keys starting after start_after + * @param filter_prefix list only keys beginning with filter_prefix + * @param max_return list no more than max_return key/value pairs + * @param iter where to store the iterator + * @param prval where to store the return value from this action + */ +CEPH_RADOS_API void rados_read_op_omap_get_vals(rados_read_op_t read_op, + const char *start_after, + const char *filter_prefix, + uint64_t max_return, + rados_omap_iter_t *iter, + int *prval) + __attribute__((deprecated)); /* use v2 below */ + +/** + * Start iterating over key/value pairs on an object. + * + * They will be returned sorted by key. + * + * @param read_op operation to add this action to + * @param start_after list keys starting after start_after + * @param filter_prefix list only keys beginning with filter_prefix + * @param max_return list no more than max_return key/value pairs + * @param iter where to store the iterator + * @param pmore flag indicating whether there are more keys to fetch + * @param prval where to store the return value from this action + */ +CEPH_RADOS_API void rados_read_op_omap_get_vals2(rados_read_op_t read_op, + const char *start_after, + const char *filter_prefix, + uint64_t max_return, + rados_omap_iter_t *iter, + unsigned char *pmore, + int *prval); + +/** + * Start iterating over keys on an object. + * + * They will be returned sorted by key, and the iterator + * will fill in NULL for all values if specified. + * + * @param read_op operation to add this action to + * @param start_after list keys starting after start_after + * @param max_return list no more than max_return keys + * @param iter where to store the iterator + * @param prval where to store the return value from this action + */ +CEPH_RADOS_API void rados_read_op_omap_get_keys(rados_read_op_t read_op, + const char *start_after, + uint64_t max_return, + rados_omap_iter_t *iter, + int *prval) + __attribute__((deprecated)); /* use v2 below */ + +/** + * Start iterating over keys on an object. + * + * They will be returned sorted by key, and the iterator + * will fill in NULL for all values if specified. + * + * @param read_op operation to add this action to + * @param start_after list keys starting after start_after + * @param max_return list no more than max_return keys + * @param iter where to store the iterator + * @param pmore flag indicating whether there are more keys to fetch + * @param prval where to store the return value from this action + */ +CEPH_RADOS_API void rados_read_op_omap_get_keys2(rados_read_op_t read_op, + const char *start_after, + uint64_t max_return, + rados_omap_iter_t *iter, + unsigned char *pmore, + int *prval); + +/** + * Start iterating over specific key/value pairs + * + * They will be returned sorted by key. + * + * @param read_op operation to add this action to + * @param keys array of pointers to null-terminated keys to get + * @param keys_len the number of strings in keys + * @param iter where to store the iterator + * @param prval where to store the return value from this action + */ +CEPH_RADOS_API void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op, + char const* const* keys, + size_t keys_len, + rados_omap_iter_t *iter, + int *prval); + +/** + * Perform a read operation synchronously + * @param read_op operation to perform + * @param io the ioctx that the object is in + * @param oid the object id + * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*) + */ +CEPH_RADOS_API int rados_read_op_operate(rados_read_op_t read_op, + rados_ioctx_t io, + const char *oid, + int flags); + +/** + * Perform a read operation asynchronously + * @param read_op operation to perform + * @param io the ioctx that the object is in + * @param completion what to do when operation has been attempted + * @param oid the object id + * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*) + */ +CEPH_RADOS_API int rados_aio_read_op_operate(rados_read_op_t read_op, + rados_ioctx_t io, + rados_completion_t completion, + const char *oid, + int flags); + +/** @} Object Operations */ + +/** + * Take an exclusive lock on an object. + * + * @param io the context to operate in + * @param oid the name of the object + * @param name the name of the lock + * @param cookie user-defined identifier for this instance of the lock + * @param desc user-defined lock description + * @param duration the duration of the lock. Set to NULL for infinite duration. + * @param flags lock flags + * @returns 0 on success, negative error code on failure + * @returns -EBUSY if the lock is already held by another (client, cookie) pair + * @returns -EEXIST if the lock is already held by the same (client, cookie) pair + */ +CEPH_RADOS_API int rados_lock_exclusive(rados_ioctx_t io, const char * oid, + const char * name, const char * cookie, + const char * desc, + struct timeval * duration, + uint8_t flags); + +/** + * Take a shared lock on an object. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param cookie user-defined identifier for this instance of the lock + * @param tag The tag of the lock + * @param desc user-defined lock description + * @param duration the duration of the lock. Set to NULL for infinite duration. + * @param flags lock flags + * @returns 0 on success, negative error code on failure + * @returns -EBUSY if the lock is already held by another (client, cookie) pair + * @returns -EEXIST if the lock is already held by the same (client, cookie) pair + */ +CEPH_RADOS_API int rados_lock_shared(rados_ioctx_t io, const char * o, + const char * name, const char * cookie, + const char * tag, const char * desc, + struct timeval * duration, uint8_t flags); + +/** + * Release a shared or exclusive lock on an object. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param cookie user-defined identifier for the instance of the lock + * @returns 0 on success, negative error code on failure + * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair + */ +CEPH_RADOS_API int rados_unlock(rados_ioctx_t io, const char *o, + const char *name, const char *cookie); + +/** + * Asynchronous release a shared or exclusive lock on an object. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param cookie user-defined identifier for the instance of the lock + * @param completion what to do when operation has been attempted + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_aio_unlock(rados_ioctx_t io, const char *o, + const char *name, const char *cookie, + rados_completion_t completion); + +/** + * List clients that have locked the named object lock and information about + * the lock. + * + * The number of bytes required in each buffer is put in the + * corresponding size out parameter. If any of the provided buffers + * are too short, -ERANGE is returned after these sizes are filled in. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param exclusive where to store whether the lock is exclusive (1) or shared (0) + * @param tag where to store the tag associated with the object lock + * @param tag_len number of bytes in tag buffer + * @param clients buffer in which locker clients are stored, separated by '\0' + * @param clients_len number of bytes in the clients buffer + * @param cookies buffer in which locker cookies are stored, separated by '\0' + * @param cookies_len number of bytes in the cookies buffer + * @param addrs buffer in which locker addresses are stored, separated by '\0' + * @param addrs_len number of bytes in the clients buffer + * @returns number of lockers on success, negative error code on failure + * @returns -ERANGE if any of the buffers are too short + */ +CEPH_RADOS_API ssize_t rados_list_lockers(rados_ioctx_t io, const char *o, + const char *name, int *exclusive, + char *tag, size_t *tag_len, + char *clients, size_t *clients_len, + char *cookies, size_t *cookies_len, + char *addrs, size_t *addrs_len); + +/** + * Releases a shared or exclusive lock on an object, which was taken by the + * specified client. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param client the client currently holding the lock + * @param cookie user-defined identifier for the instance of the lock + * @returns 0 on success, negative error code on failure + * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair + * @returns -EINVAL if the client cannot be parsed + */ +CEPH_RADOS_API int rados_break_lock(rados_ioctx_t io, const char *o, + const char *name, const char *client, + const char *cookie); + +/** + * Blacklists the specified client from the OSDs + * + * @param cluster cluster handle + * @param client_address client address + * @param expire_seconds number of seconds to blacklist (0 for default) + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_blacklist_add(rados_t cluster, + char *client_address, + uint32_t expire_seconds); + +/** + * Enable an application on a pool + * + * @param ioctx pool ioctx + * @param app_name application name + * @param force 0 if only single application per pool + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_application_enable(rados_ioctx_t io, + const char *app_name, int force); + +/** + * List all enabled applications + * + * If the provided buffer is too short, the required length is filled in and + * -ERANGE is returned. Otherwise, the buffers are filled with the application + * names, with a '\0' after each. + * + * @param ioctx pool ioctx + * @param app_name application name + * @param values buffer in which to store application names + * @param vals_len number of bytes in values buffer + * @returns 0 on success, negative error code on failure + * @returns -ERANGE if either buffer is too short + */ +CEPH_RADOS_API int rados_application_list(rados_ioctx_t io, char *values, + size_t *values_len); + +/** + * Get application metadata value from pool + * + * @param ioctx pool ioctx + * @param app_name application name + * @param key metadata key + * @param value result buffer + * @param value_len maximum len of value + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_application_metadata_get(rados_ioctx_t io, + const char *app_name, + const char *key, char *value, + size_t *value_len); + +/** + * Set application metadata on a pool + * + * @param ioctx pool ioctx + * @param app_name application name + * @param key metadata key + * @param value metadata key + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_application_metadata_set(rados_ioctx_t io, + const char *app_name, + const char *key, + const char *value); + +/** + * Remove application metadata from a pool + * + * @param ioctx pool ioctx + * @param app_name application name + * @param key metadata key + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_application_metadata_remove(rados_ioctx_t io, + const char *app_name, + const char *key); + +/** + * List all metadata key/value pairs associated with an application. + * + * This iterates over all metadata, key_len and val_len are filled in + * with the number of bytes put into the keys and values buffers. + * + * If the provided buffers are too short, the required lengths are filled + * in and -ERANGE is returned. Otherwise, the buffers are filled with + * the keys and values of the metadata, with a '\0' after each. + * + * @param ioctx pool ioctx + * @param app_name application name + * @param keys buffer in which to store key names + * @param keys_len number of bytes in keys buffer + * @param values buffer in which to store values + * @param vals_len number of bytes in values buffer + * @returns 0 on succcess, negative error code on failure + * @returns -ERANGE if either buffer is too short + */ +CEPH_RADOS_API int rados_application_metadata_list(rados_ioctx_t io, + const char *app_name, + char *keys, size_t *key_len, + char *values, + size_t *vals_len); + +/** + * @name Mon/OSD/PG Commands + * + * These interfaces send commands relating to the monitor, OSD, or PGs. + * + * @{ + */ + +/** + * Send monitor command. + * + * @note Takes command string in carefully-formatted JSON; must match + * defined commands, types, etc. + * + * The result buffers are allocated on the heap; the caller is + * expected to release that memory with rados_buffer_free(). The + * buffer and length pointers can all be NULL, in which case they are + * not filled in. + * + * @param cluster cluster handle + * @param cmd an array of char *'s representing the command + * @param cmdlen count of valid entries in cmd + * @param inbuf any bulk input data (crush map, etc.) + * @param outbuf double pointer to output buffer + * @param outbuflen pointer to output buffer length + * @param outs double pointer to status string + * @param outslen pointer to status string length + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_mon_command(rados_t cluster, const char **cmd, + size_t cmdlen, const char *inbuf, + size_t inbuflen, char **outbuf, + size_t *outbuflen, char **outs, + size_t *outslen); + +/** + * Send ceph-mgr command. + * + * @note Takes command string in carefully-formatted JSON; must match + * defined commands, types, etc. + * + * The result buffers are allocated on the heap; the caller is + * expected to release that memory with rados_buffer_free(). The + * buffer and length pointers can all be NULL, in which case they are + * not filled in. + * + * @param cluster cluster handle + * @param cmd an array of char *'s representing the command + * @param cmdlen count of valid entries in cmd + * @param inbuf any bulk input data (crush map, etc.) + * @param outbuf double pointer to output buffer + * @param outbuflen pointer to output buffer length + * @param outs double pointer to status string + * @param outslen pointer to status string length + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_mgr_command(rados_t cluster, const char **cmd, + size_t cmdlen, const char *inbuf, + size_t inbuflen, char **outbuf, + size_t *outbuflen, char **outs, + size_t *outslen); + +/** + * Send monitor command to a specific monitor. + * + * @note Takes command string in carefully-formatted JSON; must match + * defined commands, types, etc. + * + * The result buffers are allocated on the heap; the caller is + * expected to release that memory with rados_buffer_free(). The + * buffer and length pointers can all be NULL, in which case they are + * not filled in. + * + * @param cluster cluster handle + * @param name target monitor's name + * @param cmd an array of char *'s representing the command + * @param cmdlen count of valid entries in cmd + * @param inbuf any bulk input data (crush map, etc.) + * @param outbuf double pointer to output buffer + * @param outbuflen pointer to output buffer length + * @param outs double pointer to status string + * @param outslen pointer to status string length + * @returns 0 on success, negative error code on failure + */ +CEPH_RADOS_API int rados_mon_command_target(rados_t cluster, const char *name, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + +/** + * free a rados-allocated buffer + * + * Release memory allocated by librados calls like rados_mon_command(). + * + * @param buf buffer pointer + */ +CEPH_RADOS_API void rados_buffer_free(char *buf); + +CEPH_RADOS_API int rados_osd_command(rados_t cluster, int osdid, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + +CEPH_RADOS_API int rados_pg_command(rados_t cluster, const char *pgstr, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + +CEPH_RADOS_API int rados_mgr_command(rados_t cluster, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + +/* + * This is not a doxygen comment leadin, because doxygen breaks on + * a typedef with function params and returns, and I can't figure out + * how to fix it. + * + * Monitor cluster log + * + * Monitor events logged to the cluster log. The callback get each + * log entry both as a single formatted line and with each field in a + * separate arg. + * + * Calling with a cb argument of NULL will deregister any previously + * registered callback. + * + * @param cluster cluster handle + * @param level minimum log level (debug, info, warn|warning, err|error) + * @param cb callback to run for each log message. It MUST NOT block + * nor call back into librados. + * @param arg void argument to pass to cb + * + * @returns 0 on success, negative code on error + */ +typedef void (*rados_log_callback_t)(void *arg, + const char *line, + const char *who, + uint64_t sec, uint64_t nsec, + uint64_t seq, const char *level, + const char *msg); + +/* + * This is not a doxygen comment leadin, because doxygen breaks on + * a typedef with function params and returns, and I can't figure out + * how to fix it. + * + * Monitor cluster log + * + * Monitor events logged to the cluster log. The callback get each + * log entry both as a single formatted line and with each field in a + * separate arg. + * + * Calling with a cb argument of NULL will deregister any previously + * registered callback. + * + * @param cluster cluster handle + * @param level minimum log level (debug, info, warn|warning, err|error) + * @param cb callback to run for each log message. It MUST NOT block + * nor call back into librados. + * @param arg void argument to pass to cb + * + * @returns 0 on success, negative code on error + */ +typedef void (*rados_log_callback2_t)(void *arg, + const char *line, + const char *channel, + const char *who, + const char *name, + uint64_t sec, uint64_t nsec, + uint64_t seq, const char *level, + const char *msg); + +CEPH_RADOS_API int rados_monitor_log(rados_t cluster, const char *level, + rados_log_callback_t cb, void *arg); +CEPH_RADOS_API int rados_monitor_log2(rados_t cluster, const char *level, + rados_log_callback2_t cb, void *arg); + + +/** + * register daemon instance for a service + * + * Register us as a daemon providing a particular service. We identify + * the service (e.g., 'rgw') and our instance name (e.g., 'rgw.$hostname'). + * The metadata is a map of keys and values with arbitrary static metdata + * for this instance. The encoding is a series of NULL-terminated strings, + * alternating key names and values, terminating with an empty key name. + * For example, "foo\0bar\0this\0that\0\0" is the dict {foo=bar,this=that}. + * + * For the lifetime of the librados instance, regular beacons will be sent + * to the cluster to maintain our registration in the service map. + * + * @param cluster handle + * @param service service name + * @param daemon deamon instance name + * @param metadata_dict static daemon metadata dict + */ +CEPH_RADOS_API int rados_service_register( + rados_t cluster, + const char *service, + const char *daemon, + const char *metadata_dict); + +/** + * update daemon status + * + * Update our mutable status information in the service map. + * + * The status dict is encoded the same way the daemon metadata is encoded + * for rados_service_register. For example, "foo\0bar\0this\0that\0\0" is + * {foo=bar,this=that}. + * + * @param cluster rados cluster handle + * @param status_dict status dict + */ +CEPH_RADOS_API int rados_service_update_status( + rados_t cluster, + const char *status_dict); + +/** @} Mon/OSD/PG commands */ + +/* + * These methods are no longer supported and return -ENOTSUP where possible. + */ +CEPH_RADOS_API int rados_objects_list_open( + rados_ioctx_t io, + rados_list_ctx_t *ctx) __attribute__((deprecated)); +CEPH_RADOS_API uint32_t rados_objects_list_get_pg_hash_position( + rados_list_ctx_t ctx) __attribute__((deprecated)); +CEPH_RADOS_API uint32_t rados_objects_list_seek( + rados_list_ctx_t ctx, + uint32_t pos) __attribute__((deprecated)); +CEPH_RADOS_API int rados_objects_list_next( + rados_list_ctx_t ctx, + const char **entry, + const char **key) __attribute__((deprecated)); +CEPH_RADOS_API void rados_objects_list_close( + rados_list_ctx_t ctx) __attribute__((deprecated)); + + +#ifdef __cplusplus +} +#endif + +#endif