X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Frbd_fuse%2Frbd-fuse.cc;fp=src%2Fceph%2Fsrc%2Frbd_fuse%2Frbd-fuse.cc;h=6295097ca7fce5a868943137b012886067960e9c;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/rbd_fuse/rbd-fuse.cc b/src/ceph/src/rbd_fuse/rbd-fuse.cc new file mode 100644 index 0000000..6295097 --- /dev/null +++ b/src/ceph/src/rbd_fuse/rbd-fuse.cc @@ -0,0 +1,888 @@ +/* + * rbd-fuse + */ +#define FUSE_USE_VERSION 30 + +#include "include/int_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__FreeBSD__) +#include +#endif + +#include "include/compat.h" +#include "include/rbd/librbd.h" + +static int gotrados = 0; +char *pool_name; +char *mount_image_name; +rados_t cluster; +rados_ioctx_t ioctx; + +std::mutex readdir_lock; + +struct rbd_stat { + u_char valid; + rbd_image_info_t rbd_info; +}; + +struct rbd_options { + char *ceph_config; + char *pool_name; + char *image_name; +}; + +struct rbd_image { + char *image_name; + struct rbd_image *next; +}; +struct rbd_image_data { + struct rbd_image *images; + void *buf; +}; +struct rbd_image_data rbd_image_data; + +struct rbd_openimage { + char *image_name; + rbd_image_t image; + struct rbd_stat rbd_stat; +}; +#define MAX_RBD_IMAGES 128 +struct rbd_openimage opentbl[MAX_RBD_IMAGES]; + +struct rbd_options rbd_options = {(char*) "/etc/ceph/ceph.conf", (char*) "rbd", + NULL}; + +#define rbdsize(fd) opentbl[fd].rbd_stat.rbd_info.size +#define rbdblksize(fd) opentbl[fd].rbd_stat.rbd_info.obj_size +#define rbdblkcnt(fd) opentbl[fd].rbd_stat.rbd_info.num_objs + +uint64_t imagesize = 1024ULL * 1024 * 1024; +uint64_t imageorder = 22ULL; +uint64_t imagefeatures = 1ULL; + +// Minimize calls to rbd_list: marks bracketing of opendir//releasedir +int in_opendir; + +/* prototypes */ +int connect_to_cluster(rados_t *pcluster); +void enumerate_images(struct rbd_image_data *data); +int open_rbd_image(const char *image_name); +int find_openrbd(const char *path); + +void simple_err(const char *msg, int err); + +void +enumerate_images(struct rbd_image_data *data) +{ + struct rbd_image **head = &data->images; + char *ibuf = NULL; + size_t ibuf_len = 0; + struct rbd_image *im, *next; + char *ip; + int ret; + + if (*head != NULL) { + for (im = *head; im != NULL;) { + next = im->next; + free(im); + im = next; + } + *head = NULL; + free(data->buf); + data->buf = NULL; + } + + ret = rbd_list(ioctx, ibuf, &ibuf_len); + if (ret == -ERANGE) { + assert(ibuf_len > 0); + ibuf = (char*) malloc(ibuf_len); + if (!ibuf) { + simple_err("Failed to get ibuf", -ENOMEM); + return; + } + } else if (ret < 0) { + simple_err("Failed to get ibuf_len", ret); + return; + } + + ret = rbd_list(ioctx, ibuf, &ibuf_len); + if (ret < 0) { + simple_err("Failed to populate ibuf", ret); + free(ibuf); + return; + } + assert(ret == (int)ibuf_len); + + fprintf(stderr, "pool %s: ", pool_name); + for (ip = ibuf; ip < &ibuf[ibuf_len]; ip += strlen(ip) + 1) { + if ((mount_image_name == NULL) || + ((strlen(mount_image_name) > 0) && + (strcmp(ip, mount_image_name) == 0))) { + fprintf(stderr, "%s, ", ip); + im = static_cast(malloc(sizeof(*im))); + im->image_name = ip; + im->next = *head; + *head = im; + } + } + fprintf(stderr, "\n"); + data->buf = ibuf; +} + +int +find_openrbd(const char *path) +{ + int i; + + /* find in opentbl[] entry if already open */ + for (i = 0; i < MAX_RBD_IMAGES; i++) { + if ((opentbl[i].image_name != NULL) && + (strcmp(opentbl[i].image_name, path) == 0)) { + return i; + } + } + return -1; +} + +int +open_rbd_image(const char *image_name) +{ + struct rbd_image *im; + struct rbd_openimage *rbd = NULL; + int fd; + + if (image_name == (char *)NULL) + return -1; + + // relies on caller to keep rbd_image_data up to date + for (im = rbd_image_data.images; im != NULL; im = im->next) { + if (strcmp(im->image_name, image_name) == 0) { + break; + } + } + if (im == NULL) + return -1; + + /* find in opentbl[] entry if already open */ + if ((fd = find_openrbd(image_name)) != -1) { + rbd = &opentbl[fd]; + } else { + int i; + // allocate an opentbl[] and open the image + for (i = 0; i < MAX_RBD_IMAGES; i++) { + if (opentbl[i].image == NULL) { + fd = i; + rbd = &opentbl[fd]; + rbd->image_name = strdup(image_name); + break; + } + } + if (i == MAX_RBD_IMAGES || !rbd) + return -1; + int ret = rbd_open(ioctx, rbd->image_name, &(rbd->image), NULL); + if (ret < 0) { + simple_err("open_rbd_image: can't open: ", ret); + return ret; + } + } + rbd_stat(rbd->image, &(rbd->rbd_stat.rbd_info), + sizeof(rbd_image_info_t)); + rbd->rbd_stat.valid = 1; + return fd; +} + +static void +iter_images(void *cookie, + void (*iter)(void *cookie, const char *image)) +{ + struct rbd_image *im; + + readdir_lock.lock(); + + for (im = rbd_image_data.images; im != NULL; im = im->next) + iter(cookie, im->image_name); + readdir_lock.unlock(); +} + +static void count_images_cb(void *cookie, const char *image) +{ + (*((unsigned int *)cookie))++; +} + +static int count_images(void) +{ + unsigned int count = 0; + + readdir_lock.lock(); + enumerate_images(&rbd_image_data); + readdir_lock.unlock(); + + iter_images(&count, count_images_cb); + return count; +} + +extern "C" { + +static int rbdfs_getattr(const char *path, struct stat *stbuf) +{ + int fd; + time_t now; + + if (!gotrados) + return -ENXIO; + + if (path[0] == 0) + return -ENOENT; + + memset(stbuf, 0, sizeof(struct stat)); + + if (strcmp(path, "/") == 0) { + + now = time(NULL); + stbuf->st_mode = S_IFDIR + 0755; + stbuf->st_nlink = 2+count_images(); + stbuf->st_uid = getuid(); + stbuf->st_gid = getgid(); + stbuf->st_size = 1024; + stbuf->st_blksize = 1024; + stbuf->st_blocks = 1; + stbuf->st_atime = now; + stbuf->st_mtime = now; + stbuf->st_ctime = now; + + return 0; + } + + if (!in_opendir) { + readdir_lock.lock(); + enumerate_images(&rbd_image_data); + readdir_lock.unlock(); + } + fd = open_rbd_image(path + 1); + if (fd < 0) + return -ENOENT; + + now = time(NULL); + stbuf->st_mode = S_IFREG | 0666; + stbuf->st_nlink = 1; + stbuf->st_uid = getuid(); + stbuf->st_gid = getgid(); + stbuf->st_size = rbdsize(fd); + stbuf->st_blksize = rbdblksize(fd); + stbuf->st_blocks = rbdblkcnt(fd); + stbuf->st_atime = now; + stbuf->st_mtime = now; + stbuf->st_ctime = now; + + return 0; +} + + +static int rbdfs_open(const char *path, struct fuse_file_info *fi) +{ + int fd; + + if (!gotrados) + return -ENXIO; + + if (path[0] == 0) + return -ENOENT; + + readdir_lock.lock(); + enumerate_images(&rbd_image_data); + readdir_lock.unlock(); + fd = open_rbd_image(path + 1); + if (fd < 0) + return -ENOENT; + + fi->fh = fd; + return 0; +} + +static int rbdfs_read(const char *path, char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + size_t numread; + struct rbd_openimage *rbd; + + if (!gotrados) + return -ENXIO; + + rbd = &opentbl[fi->fh]; + numread = 0; + while (size > 0) { + ssize_t ret; + + ret = rbd_read(rbd->image, offset, size, buf); + + if (ret <= 0) + break; + buf += ret; + size -= ret; + offset += ret; + numread += ret; + } + + return numread; +} + +static int rbdfs_write(const char *path, const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + size_t numwritten; + struct rbd_openimage *rbd; + + if (!gotrados) + return -ENXIO; + + rbd = &opentbl[fi->fh]; + numwritten = 0; + while (size > 0) { + ssize_t ret; + + if ((size_t)(offset + size) > rbdsize(fi->fh)) { + int r; + fprintf(stderr, "rbdfs_write resizing %s to 0x%" PRIxMAX "\n", + path, offset+size); + r = rbd_resize(rbd->image, offset+size); + if (r < 0) + return r; + + r = rbd_stat(rbd->image, &(rbd->rbd_stat.rbd_info), + sizeof(rbd_image_info_t)); + if (r < 0) + return r; + } + ret = rbd_write(rbd->image, offset, size, buf); + + if (ret < 0) + break; + buf += ret; + size -= ret; + offset += ret; + numwritten += ret; + } + + return numwritten; +} + +static void rbdfs_statfs_image_cb(void *num, const char *image) +{ + int fd; + + ((uint64_t *)num)[0]++; + + fd = open_rbd_image(image); + if (fd >= 0) + ((uint64_t *)num)[1] += rbdsize(fd); +} + +static int rbdfs_statfs(const char *path, struct statvfs *buf) +{ + uint64_t num[2]; + + if (!gotrados) + return -ENXIO; + + num[0] = 1; + num[1] = 0; + readdir_lock.lock(); + enumerate_images(&rbd_image_data); + readdir_lock.unlock(); + iter_images(num, rbdfs_statfs_image_cb); + +#define RBDFS_BSIZE 4096 + buf->f_bsize = RBDFS_BSIZE; + buf->f_frsize = RBDFS_BSIZE; + buf->f_blocks = num[1] / RBDFS_BSIZE; + buf->f_bfree = 0; + buf->f_bavail = 0; + buf->f_files = num[0]; + buf->f_ffree = 0; + buf->f_favail = 0; + buf->f_fsid = 0; + buf->f_flag = 0; + buf->f_namemax = PATH_MAX; + + return 0; +} + +static int rbdfs_fsync(const char *path, int datasync, + struct fuse_file_info *fi) +{ + if (!gotrados) + return -ENXIO; + rbd_flush(opentbl[fi->fh].image); + return 0; +} + +static int rbdfs_opendir(const char *path, struct fuse_file_info *fi) +{ + // only one directory, so global "in_opendir" flag should be fine + readdir_lock.lock(); + in_opendir++; + enumerate_images(&rbd_image_data); + readdir_lock.unlock(); + return 0; +} + +struct rbdfs_readdir_info { + void *buf; + fuse_fill_dir_t filler; +}; + +static void rbdfs_readdir_cb(void *_info, const char *name) +{ + struct rbdfs_readdir_info *info = (struct rbdfs_readdir_info*) _info; + + info->filler(info->buf, name, NULL, 0); +} + +static int rbdfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi) +{ + struct rbdfs_readdir_info info = { buf, filler }; + + if (!gotrados) + return -ENXIO; + if (!in_opendir) + fprintf(stderr, "in readdir, but not inside opendir?\n"); + + if (strcmp(path, "/") != 0) + return -ENOENT; + + filler(buf, ".", NULL, 0); + filler(buf, "..", NULL, 0); + iter_images(&info, rbdfs_readdir_cb); + + return 0; +} +static int rbdfs_releasedir(const char *path, struct fuse_file_info *fi) +{ + // see opendir comments + readdir_lock.lock(); + in_opendir--; + readdir_lock.unlock(); + return 0; +} + +void * +rbdfs_init(struct fuse_conn_info *conn) +{ + int ret; + + // init cannot fail, so if we fail here, gotrados remains at 0, + // causing other operations to fail immediately with ENXIO + + ret = connect_to_cluster(&cluster); + if (ret < 0) + exit(90); + + pool_name = rbd_options.pool_name; + mount_image_name = rbd_options.image_name; + ret = rados_ioctx_create(cluster, pool_name, &ioctx); + if (ret < 0) + exit(91); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + conn->want |= FUSE_CAP_BIG_WRITES; +#endif + gotrados = 1; + + // init's return value shows up in fuse_context.private_data, + // also to void (*destroy)(void *); useful? + return NULL; +} + +void +rbdfs_destroy(void *unused) +{ + if (!gotrados) + return; + for (int i = 0; i < MAX_RBD_IMAGES; ++i) { + if (opentbl[i].image) { + rbd_close(opentbl[i].image); + opentbl[i].image = NULL; + } + } + rados_ioctx_destroy(ioctx); + rados_shutdown(cluster); +} + +int +rbdfs_checkname(const char *checkname) +{ + const char *extra[] = {"@", "/"}; + std::string strCheckName(checkname); + + if (strCheckName.empty()) + return -EINVAL; + + unsigned int sz = sizeof(extra) / sizeof(const char*); + for (unsigned int i = 0; i < sz; i++) + { + std::string ex(extra[i]); + if (std::string::npos != strCheckName.find(ex)) + return -EINVAL; + } + + return 0; +} + +// return -errno on error. fi->fh is not set until open time + +int +rbdfs_create(const char *path, mode_t mode, struct fuse_file_info *fi) +{ + int r; + int order = imageorder; + + r = rbdfs_checkname(path+1); + if (r != 0) + { + return r; + } + + r = rbd_create2(ioctx, path+1, imagesize, imagefeatures, &order); + return r; +} + +int +rbdfs_rename(const char *path, const char *destname) +{ + int r; + + r = rbdfs_checkname(destname+1); + if (r != 0) + { + return r; + } + + if (strcmp(path, "/") == 0) + return -EINVAL; + + return rbd_rename(ioctx, path+1, destname+1); +} + +int +rbdfs_utime(const char *path, struct utimbuf *utime) +{ + // called on create; not relevant + return 0; +} + +int +rbdfs_unlink(const char *path) +{ + int fd = find_openrbd(path+1); + if (fd != -1) { + struct rbd_openimage *rbd = &opentbl[fd]; + rbd_close(rbd->image); + rbd->image = 0; + free(rbd->image_name); + rbd->rbd_stat.valid = 0; + } + return rbd_remove(ioctx, path+1); +} + + +int +rbdfs_truncate(const char *path, off_t size) +{ + int fd; + int r; + struct rbd_openimage *rbd; + + if ((fd = open_rbd_image(path+1)) < 0) + return -ENOENT; + + rbd = &opentbl[fd]; + fprintf(stderr, "truncate %s to %" PRIdMAX " (0x%" PRIxMAX ")\n", + path, size, size); + r = rbd_resize(rbd->image, size); + if (r < 0) + return r; + + r = rbd_stat(rbd->image, &(rbd->rbd_stat.rbd_info), + sizeof(rbd_image_info_t)); + if (r < 0) + return r; + return 0; +} + +/** + * set an xattr on path, with name/value, length size. + * Presumably flags are from Linux, as in XATTR_CREATE or + * XATTR_REPLACE (both "set", but fail if exist vs fail if not exist. + * + * We accept xattrs only on the root node. + * + * All values converted with strtoull, so can be expressed in any base + */ + +struct rbdfuse_attr { + char *attrname; + uint64_t *attrvalp; +} attrs[] = { + { (char*) "user.rbdfuse.imagesize", &imagesize }, + { (char*) "user.rbdfuse.imageorder", &imageorder }, + { (char*) "user.rbdfuse.imagefeatures", &imagefeatures }, + { NULL, NULL } +}; + +int +rbdfs_setxattr(const char *path, const char *name, const char *value, + size_t size, + int flags +#if defined(DARWIN) + ,uint32_t pos +#endif + ) +{ + struct rbdfuse_attr *ap; + if (strcmp(path, "/") != 0) + return -EINVAL; + + for (ap = attrs; ap->attrname != NULL; ap++) { + if (strcmp(name, ap->attrname) == 0) { + *ap->attrvalp = strtoull(value, NULL, 0); + fprintf(stderr, "rbd-fuse: %s set to 0x%" PRIx64 "\n", + ap->attrname, *ap->attrvalp); + return 0; + } + } + return -EINVAL; +} + +int +rbdfs_getxattr(const char *path, const char *name, char *value, + size_t size +#if defined(DARWIN) + ,uint32_t position +#endif + ) +{ + struct rbdfuse_attr *ap; + char buf[128]; + // allow gets on other files; ls likes to ask for things like + // security.* + + for (ap = attrs; ap->attrname != NULL; ap++) { + if (strcmp(name, ap->attrname) == 0) { + sprintf(buf, "%" PRIu64, *ap->attrvalp); + if (value != NULL && size >= strlen(buf)) + strcpy(value, buf); + fprintf(stderr, "rbd-fuse: get %s\n", ap->attrname); + return (strlen(buf)); + } + } + return 0; +} + +int +rbdfs_listxattr(const char *path, char *list, size_t len) +{ + struct rbdfuse_attr *ap; + size_t required_len = 0; + + if (strcmp(path, "/") != 0) + return -EINVAL; + + for (ap = attrs; ap->attrname != NULL; ap++) + required_len += strlen(ap->attrname) + 1; + if (len >= required_len) { + for (ap = attrs; ap->attrname != NULL; ap++) { + sprintf(list, "%s", ap->attrname); + list += strlen(ap->attrname) + 1; + } + } + return required_len; +} + +const static struct fuse_operations rbdfs_oper = { + getattr: rbdfs_getattr, + readlink: 0, + getdir: 0, + mknod: 0, + mkdir: 0, + unlink: rbdfs_unlink, + rmdir: 0, + symlink: 0, + rename: rbdfs_rename, + link: 0, + chmod: 0, + chown: 0, + truncate: rbdfs_truncate, + utime: rbdfs_utime, + open: rbdfs_open, + read: rbdfs_read, + write: rbdfs_write, + statfs: rbdfs_statfs, + flush: 0, + release: 0, + fsync: rbdfs_fsync, + setxattr: rbdfs_setxattr, + getxattr: rbdfs_getxattr, + listxattr: rbdfs_listxattr, + removexattr: 0, + opendir: rbdfs_opendir, + readdir: rbdfs_readdir, + releasedir: rbdfs_releasedir, + fsyncdir: 0, + init: rbdfs_init, + destroy: rbdfs_destroy, + access: 0, + create: rbdfs_create, + /* skip unimplemented */ +}; + +} /* extern "C" */ + +enum { + KEY_HELP, + KEY_VERSION, + KEY_CEPH_CONFIG, + KEY_CEPH_CONFIG_LONG, + KEY_RADOS_POOLNAME, + KEY_RADOS_POOLNAME_LONG, + KEY_RBD_IMAGENAME, + KEY_RBD_IMAGENAME_LONG +}; + +static struct fuse_opt rbdfs_opts[] = { + FUSE_OPT_KEY("-h", KEY_HELP), + FUSE_OPT_KEY("--help", KEY_HELP), + FUSE_OPT_KEY("-V", KEY_VERSION), + FUSE_OPT_KEY("--version", KEY_VERSION), + {"-c %s", offsetof(struct rbd_options, ceph_config), KEY_CEPH_CONFIG}, + {"--configfile=%s", offsetof(struct rbd_options, ceph_config), + KEY_CEPH_CONFIG_LONG}, + {"-p %s", offsetof(struct rbd_options, pool_name), KEY_RADOS_POOLNAME}, + {"--poolname=%s", offsetof(struct rbd_options, pool_name), + KEY_RADOS_POOLNAME_LONG}, + {"-r %s", offsetof(struct rbd_options, image_name), KEY_RBD_IMAGENAME}, + {"--image=%s", offsetof(struct rbd_options, image_name), + KEY_RBD_IMAGENAME_LONG}, +}; + +static void usage(const char *progname) +{ + fprintf(stderr, +"Usage: %s mountpoint [options]\n" +"\n" +"General options:\n" +" -h --help print help\n" +" -V --version print version\n" +" -c --configfile ceph configuration file [/etc/ceph/ceph.conf]\n" +" -p --poolname rados pool name [rbd]\n" +" -r --image RBD image name\n" +"\n", progname); +} + +static int rbdfs_opt_proc(void *data, const char *arg, int key, + struct fuse_args *outargs) +{ + if (key == KEY_HELP) { + usage(outargs->argv[0]); + fuse_opt_add_arg(outargs, "-ho"); + fuse_main(outargs->argc, outargs->argv, &rbdfs_oper, NULL); + exit(1); + } + + if (key == KEY_VERSION) { + fuse_opt_add_arg(outargs, "--version"); + fuse_main(outargs->argc, outargs->argv, &rbdfs_oper, NULL); + exit(0); + } + + if (key == KEY_CEPH_CONFIG) { + if (rbd_options.ceph_config != NULL) { + free(rbd_options.ceph_config); + rbd_options.ceph_config = NULL; + } + rbd_options.ceph_config = strdup(arg+2); + return 0; + } + + if (key == KEY_RADOS_POOLNAME) { + if (rbd_options.pool_name != NULL) { + free(rbd_options.pool_name); + rbd_options.pool_name = NULL; + } + rbd_options.pool_name = strdup(arg+2); + return 0; + } + + if (key == KEY_RBD_IMAGENAME) { + if (rbd_options.image_name!= NULL) { + free(rbd_options.image_name); + rbd_options.image_name = NULL; + } + rbd_options.image_name = strdup(arg+2); + return 0; + } + + return 1; +} + +void +simple_err(const char *msg, int err) +{ + fprintf(stderr, "%s: %s\n", msg, strerror(-err)); + return; +} + +int +connect_to_cluster(rados_t *pcluster) +{ + int r; + + r = rados_create(pcluster, NULL); + if (r < 0) { + simple_err("Could not create cluster handle", r); + return r; + } + rados_conf_parse_env(*pcluster, NULL); + r = rados_conf_read_file(*pcluster, rbd_options.ceph_config); + if (r < 0) { + simple_err("Error reading Ceph config file", r); + goto failed_shutdown; + } + r = rados_connect(*pcluster); + if (r < 0) { + simple_err("Error connecting to cluster", r); + goto failed_shutdown; + } + + return 0; + +failed_shutdown: + rados_shutdown(*pcluster); + return r; +} + +int main(int argc, char *argv[]) +{ + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + + if (fuse_opt_parse(&args, &rbd_options, rbdfs_opts, rbdfs_opt_proc) == -1) { + exit(1); + } + + return fuse_main(args.argc, args.argv, &rbdfs_oper, NULL); +}