1 #ifndef CEPH_LIBRADOSSTRIPER_H
2 #define CEPH_LIBRADOSSTRIPER_H
10 #include "../rados/librados.h"
12 #define LIBRADOSSTRIPER_VER_MAJOR 0
13 #define LIBRADOSSTRIPER_VER_MINOR 0
14 #define LIBRADOSSTRIPER_VER_EXTRA 0
16 #define LIBRADOSSTRIPER_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
18 #define LIBRADOSSTRIPER_VERSION_CODE LIBRADOSSTRIPER_VERSION(LIBRADOSSTRIPER_VER_MAJOR, LIBRADOSSTRIPER_VER_MINOR, LIBRADOSSTRIPER_VER_EXTRA)
21 * @typedef rados_striper_t
23 * A handle for interacting with striped objects in a RADOS cluster.
25 typedef void *rados_striper_t;
28 * @defgroup libradosstriper_h_init Setup and Teardown
29 * These are the first and last functions to that should be called
30 * when using libradosstriper.
36 * Creates a rados striper using the given io context
37 * Striper has initially default object layout.
38 * See rados_striper_set_object_layout_*() to change this
40 * @param ioctx the rados context to use
41 * @param striper where to store the rados striper
42 * @returns 0 on success, negative error code on failure
44 int rados_striper_create(rados_ioctx_t ioctx,
45 rados_striper_t *striper);
48 * Destroys a rados striper
50 * @param striper the striper to destroy
52 void rados_striper_destroy(rados_striper_t striper);
55 * Sets the object layout's stripe unit of a rados striper for future objects.
56 * This layout will be used when new objects are created (by writing to them)
57 * Already existing objects will be opened with their own layout.
59 * @param striper the targetted striper
60 * @param stripe_unit the stripe_unit value of the new object layout
61 * @returns 0 on success, negative error code on failure
63 int rados_striper_set_object_layout_stripe_unit(rados_striper_t striper,
64 unsigned int stripe_unit);
67 * Sets the object layout's stripe count of a rados striper for future objects.
68 * This layout will be used when new objects are created (by writing to them)
69 * Already existing objects will be opened with their own layout.
71 * @param striper the targetted striper
72 * @param stripe_count the stripe_count value of the new object layout
73 * @returns 0 on success, negative error code on failure
75 int rados_striper_set_object_layout_stripe_count(rados_striper_t striper,
76 unsigned int stripe_count);
79 * Sets the object layout's object_size of a rados striper for future objects.
80 * This layout will be used when new objects are created (by writing to them)
81 * Already existing objects will be opened with their own layout.
83 * @param striper the targetted striper
84 * @param object_size the object_size value of the new object layout
85 * @returns 0 on success, negative error code on failure
87 int rados_striper_set_object_layout_object_size(rados_striper_t striper,
88 unsigned int object_size);
93 * @defgroup libradosstriper_h_synch_io Synchronous I/O
94 * Writes are striped to several rados objects which are then
95 * replicated to a number of OSDs based on the configuration
96 * of the pool they are in. These write functions block
97 * until data is in memory on all replicas of the object they're
98 * writing to - they are equivalent to doing the corresponding
99 * asynchronous write, and the calling
100 * rados_striper_ioctx_wait_for_complete().
106 * Synchronously write data to a striped object at the specified offset
108 * @param striper the striper in which the write will occur
109 * @param soid the name of the striped object
110 * @param buf data to write
111 * @param len length of the data, in bytes
112 * @param off byte offset in the object to begin writing at
113 * @returns 0 on success, negative error code on failure
116 int rados_striper_write(rados_striper_t striper,
123 * Synchronously write an entire striped object
125 * The striped object is filled with the provided data. If the striped object exists,
126 * it is truncated and then written.
128 * @param striper the striper in which the write will occur
129 * @param soid the name of the striped object
130 * @param buf data to write
131 * @param len length of the data, in bytes
132 * @returns 0 on success, negative error code on failure
134 int rados_striper_write_full(rados_striper_t striper,
140 * Append data to an object
142 * @param striper the striper in which the write will occur
143 * @param soid the name of the striped object
144 * @param buf the data to append
145 * @param len length of buf (in bytes)
146 * @returns 0 on success, negative error code on failure
149 int rados_striper_append(rados_striper_t striper,
155 * Synchronously read data from a striped object at the specified offset
157 * @param striper the striper in which the read will occur
158 * @param soid the name of the striped object
159 * @param buf where to store the results
160 * @param len the number of bytes to read
161 * @param off the offset to start reading from in the object
162 * @returns number of bytes read on success, negative error code on
165 int rados_striper_read(rados_striper_t striper,
172 * Synchronously removes a striped object
174 * @note There is no atomicity of the deletion and the striped
175 * object may be left incomplete if an error is returned (metadata
176 * all present, but some stripes missing)
177 * However, there is a atomicity of the metadata deletion and
178 * the deletion can not happen if any I/O is ongoing (it
179 * will return EBUSY). Identically, no I/O will be able to start
180 * during deletion (same EBUSY return code)
181 * @param striper the striper in which the remove will occur
182 * @param soid the name of the striped object
183 * @returns 0 on success, negative error code on failure
185 int rados_striper_remove(rados_striper_t striper,
191 * If this enlarges the object, the new area is logically filled with
192 * zeroes. If this shrinks the object, the excess data is removed.
194 * @note the truncation is not fully atomic. The metadata part is,
195 * so the behavior will be atomic from user point of view when
196 * the object size is reduced. However, in case of failure, old data
197 * may stay around, hidden. They may reappear if the object size is
198 * later grown, instead of the expected 0s. When growing the
199 * object and in case of failure, the new 0 data may not be
200 * fully created. This can lead to ENOENT errors when
201 * writing/reading the missing parts.
202 * @note the truncation can not happen if any I/O is ongoing (it
203 * will return EBUSY). Identically, no I/O will be able to start
204 * during truncation (same EBUSY return code)
205 * @param io the rados context to use
206 * @param soid the name of the striped object
207 * @param size the new size of the object in bytes
208 * @returns 0 on success, negative error code on failure
210 int rados_striper_trunc(rados_ioctx_t io, const char *soid, uint64_t size);
212 /** @} Synchronous I/O */
215 * @defgroup libradosstriper_h_xattrs Xattrs
216 * Extended attributes are stored as extended attributes on the
217 * first rados regular object of the striped object.
218 * Thus, they have the same limitations as the underlying
219 * rados extended attributes.
225 * Get the value of an extended attribute on a striped object.
227 * @param striper the striper in which the getxattr will occur
228 * @param oid name of the striped object
229 * @param name which extended attribute to read
230 * @param buf where to store the result
231 * @param len size of buf in bytes
232 * @returns length of xattr value on success, negative error code on failure
234 int rados_striper_getxattr(rados_striper_t striper,
241 * Set an extended attribute on a striped object.
243 * @param striper the striper in which the setxattr will occur
244 * @param oid name of the object
245 * @param name which extended attribute to set
246 * @param buf what to store in the xattr
247 * @param len the number of bytes in buf
248 * @returns 0 on success, negative error code on failure
250 int rados_striper_setxattr(rados_striper_t striper,
257 * Delete an extended attribute from a striped object.
259 * @param striper the striper in which the rmxattr will occur
260 * @param oid name of the object
261 * @param name which xattr to delete
262 * @returns 0 on success, negative error code on failure
264 int rados_striper_rmxattr(rados_striper_t striper,
269 * Start iterating over xattrs on a striped object.
271 * @post iter is a valid iterator
273 * @param striper the striper in which the getxattrs will occur
274 * @param oid name of the object
275 * @param iter where to store the iterator
276 * @returns 0 on success, negative error code on failure
278 int rados_striper_getxattrs(rados_striper_t striper,
280 rados_xattrs_iter_t *iter);
283 * Get the next xattr on the striped object
285 * @pre iter is a valid iterator
287 * @post name is the NULL-terminated name of the next xattr, and val
288 * contains the value of the xattr, which is of length len. If the end
289 * of the list has been reached, name and val are NULL, and len is 0.
291 * @param iter iterator to advance
292 * @param name where to store the name of the next xattr
293 * @param val where to store the value of the next xattr
294 * @param len the number of bytes in val
295 * @returns 0 on success, negative error code on failure
297 int rados_striper_getxattrs_next(rados_xattrs_iter_t iter,
303 * Close the xattr iterator.
305 * iter should not be used after this is called.
307 * @param iter the iterator to close
309 void rados_striper_getxattrs_end(rados_xattrs_iter_t iter);
314 * Synchronously get object stats (size/mtime)
316 * @param striper the striper in which the stat will occur
317 * @param soid the id of the striped object
318 * @param psize where to store object size
319 * @param pmtime where to store modification time
320 * @returns 0 on success, negative error code on failure
322 int rados_striper_stat(rados_striper_t striper,
328 * @defgroup libradosstriper_h_asynch_io Asynchronous I/O
329 * Read and write to objects without blocking.
335 * @typedef rados_striper_multi_completion_t
336 * Represents the state of a set of asynchronous operations
337 * it contains the aggregated return value once the operations complete
338 * and can be used to block until all operations are complete and/or safe.
340 typedef void *rados_striper_multi_completion_t;
343 * Constructs a multi completion to use with asynchronous operations
345 * The complete and safe callbacks correspond to operations being
346 * acked and committed, respectively. The callbacks are called in
347 * order of receipt, so the safe callback may be triggered before the
348 * complete callback, and vice versa. This is affected by journalling
351 * @note Read operations only get a complete callback.
352 * @note BUG: this should check for ENOMEM instead of throwing an exception
354 * @param cb_arg application-defined data passed to the callback functions
355 * @param cb_complete the function to be called when the operation is
356 * in memory on all relpicas
357 * @param cb_safe the function to be called when the operation is on
358 * stable storage on all replicas
359 * @param pc where to store the completion
362 int rados_striper_multi_aio_create_completion(void *cb_arg,
363 rados_callback_t cb_complete,
364 rados_callback_t cb_safe,
365 rados_striper_multi_completion_t *pc);
368 * Block until all operation complete
370 * This means data is in memory on all replicas.
372 * @param c operations to wait for
375 void rados_striper_multi_aio_wait_for_complete(rados_striper_multi_completion_t c);
378 * Block until all operation are safe
380 * This means data is on stable storage on all replicas.
382 * @param c operations to wait for
385 void rados_striper_multi_aio_wait_for_safe(rados_striper_multi_completion_t c);
388 * Has a multi asynchronous operation completed?
390 * @warning This does not imply that the complete callback has
393 * @param c async operations to inspect
394 * @returns whether c is complete
396 int rados_striper_multi_aio_is_complete(rados_striper_multi_completion_t c);
399 * Is a multi asynchronous operation safe?
401 * @warning This does not imply that the safe callback has
404 * @param c async operations to inspect
405 * @returns whether c is safe
407 int rados_striper_multi_aio_is_safe(rados_striper_multi_completion_t c);
410 * Block until all operations complete and callback completes
412 * This means data is in memory on all replicas and can be read.
414 * @param c operations to wait for
417 void rados_striper_multi_aio_wait_for_complete_and_cb(rados_striper_multi_completion_t c);
420 * Block until all operations are safe and callback has completed
422 * This means data is on stable storage on all replicas.
424 * @param c operations to wait for
427 void rados_striper_multi_aio_wait_for_safe_and_cb(rados_striper_multi_completion_t c);
430 * Has a multi asynchronous operation and callback completed
432 * @param c async operations to inspect
433 * @returns whether c is complete
435 int rados_striper_multi_aio_is_complete_and_cb(rados_striper_multi_completion_t c);
438 * Is a multi asynchronous operation safe and has the callback completed
440 * @param c async operations to inspect
441 * @returns whether c is safe
443 int rados_striper_multi_aio_is_safe_and_cb(rados_striper_multi_completion_t c);
446 * Get the return value of a multi asychronous operation
448 * The return value is set when all operations are complete or safe,
449 * whichever comes first.
451 * @pre The operation is safe or complete
453 * @note BUG: complete callback may never be called when the safe
454 * message is received before the complete message
456 * @param c async operations to inspect
457 * @returns aggregated return value of the operations
459 int rados_striper_multi_aio_get_return_value(rados_striper_multi_completion_t c);
462 * Release a multi asynchrnous IO completion
464 * Call this when you no longer need the completion. It may not be
465 * freed immediately if the operation is not acked and committed.
467 * @param c multi completion to release
469 void rados_striper_multi_aio_release(rados_striper_multi_completion_t c);
472 * Asynchronously write data to a striped object at the specified offset
474 * The return value of the completion will be 0 on success, negative
475 * error code on failure.
477 * @param striper the striper in which the write will occur
478 * @param soid the name of the striped object
479 * @param completion what to do when the write is safe and complete
480 * @param buf data to write
481 * @param len length of the data, in bytes
482 * @param off byte offset in the object to begin writing at
483 * @returns 0 on success, negative error code on
486 int rados_striper_aio_write(rados_striper_t striper,
488 rados_completion_t completion,
494 * Asynchronously appends data to a striped object
496 * The return value of the completion will be 0 on success, negative
497 * error code on failure.
499 * @param striper the striper in which the write will occur
500 * @param soid the name of the striped object
501 * @param completion what to do when the write is safe and complete
502 * @param buf data to write
503 * @param len length of the data, in bytes
504 * @returns 0 on success, negative error code on
507 int rados_striper_aio_append(rados_striper_t striper,
509 rados_completion_t completion,
514 * Asynchronously fills and object with the provided data.
515 * If the object exists, it is truncated and then written.
517 * The return value of the completion will be 0 on success, negative
518 * error code on failure.
520 * @param striper the striper in which the write will occur
521 * @param soid the name of the striped object
522 * @param completion what to do when the write is safe and complete
523 * @param buf data to write
524 * @param len length of the data, in bytes
525 * @returns 0 on success, negative error code on
528 int rados_striper_aio_write_full(rados_striper_t striper,
530 rados_completion_t completion,
535 * Asynchronously read data from a striped object at the specified offset
537 * The return value of the completion will be number of bytes read on
538 * success, negative error code on failure.
540 * @param striper the striper in which the read will occur
541 * @param soid the name of the striped object
542 * @param completion what to do when the read is safe and complete
543 * @param buf where to store the results
544 * @param len the number of bytes to read
545 * @param off the offset to start reading from in the object
546 * @returns 0 on success, negative error code on
549 int rados_striper_aio_read(rados_striper_t striper,
551 rados_completion_t completion,
557 * Asynchronously removes a striped object
559 * @note There is no atomicity of the deletion and the striped
560 * object may be left incomplete if an error is returned (metadata
561 * all present, but some stripes missing)
562 * However, there is a atomicity of the metadata deletion and
563 * the deletion can not happen if any I/O is ongoing (it
564 * will return EBUSY). Identically, no I/O will be able to start
565 * during deletion (same EBUSY return code)
566 * @param striper the striper in which the remove will occur
567 * @param soid the name of the striped object
568 * @param completion what to do when the remove is safe and complete
569 * @returns 0 on success, negative error code on failure
572 int rados_striper_aio_remove(rados_striper_t striper,
574 rados_completion_t completion);
577 * Block until all pending writes in a striper are safe
579 * This is not equivalent to calling rados_striper_multi_aio_wait_for_safe() on all
580 * write completions, since this waits for the associated callbacks to
583 * @param striper the striper in which the flush will occur
584 * @returns 0 on success, negative error code on failure
586 void rados_striper_aio_flush(rados_striper_t striper);
589 * Asynchronously get object stats (size/mtime)
591 * @param striper the striper in which the stat will occur
592 * @param soid the id of the striped object
593 * @param psize where to store object size
594 * @param pmtime where to store modification time
595 * @param completion what to do when the stats is complete
596 * @returns 0 on success, negative error code on failure
598 int rados_striper_aio_stat(rados_striper_t striper,
600 rados_completion_t completion,
604 /** @} Asynchronous I/O */