X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=src%2Fceph%2Fsrc%2Fclient%2Ffuse_ll.cc;fp=src%2Fceph%2Fsrc%2Fclient%2Ffuse_ll.cc;h=d24ad5c3451a14a5211988510e313af7efd06589;hb=812ff6ca9fcd3e629e49d4328905f33eee8ca3f5;hp=0000000000000000000000000000000000000000;hpb=15280273faafb77777eab341909a3f495cf248d9;p=stor4nfv.git diff --git a/src/ceph/src/client/fuse_ll.cc b/src/ceph/src/client/fuse_ll.cc new file mode 100644 index 0000000..d24ad5c --- /dev/null +++ b/src/ceph/src/client/fuse_ll.cc @@ -0,0 +1,1259 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ceph +#include "common/errno.h" +#include "common/safe_io.h" +#include "include/types.h" +#include "Client.h" +#include "Fh.h" +#include "ioctl.h" +#include "common/config.h" +#include "include/assert.h" +#include "include/cephfs/ceph_statx.h" + +#include "fuse_ll.h" +#include +#include + +#define dout_context g_ceph_context + +#define FINO_INO(x) ((x) & ((1ull<<48)-1ull)) +#define FINO_STAG(x) ((x) >> 48) +#define MAKE_FINO(i,s) ((i) | ((s) << 48)) + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + +static uint32_t new_encode_dev(dev_t dev) +{ + unsigned major = MAJOR(dev); + unsigned minor = MINOR(dev); + return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); +} + +static dev_t new_decode_dev(uint32_t dev) +{ + unsigned major = (dev & 0xfff00) >> 8; + unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); + return MKDEV(major, minor); +} + +class CephFuse::Handle { +public: + Handle(Client *c, int fd); + ~Handle(); + + int init(int argc, const char *argv[]); + int start(); + int loop(); + void finalize(); + + uint64_t fino_snap(uint64_t fino); + uint64_t make_fake_ino(inodeno_t ino, snapid_t snapid); + Inode * iget(fuse_ino_t fino); + void iput(Inode *in); + + int fd_on_success; + Client *client; + + struct fuse_chan *ch; + struct fuse_session *se; + char *mountpoint; + + Mutex stag_lock; + int last_stag; + + ceph::unordered_map snap_stag_map; + ceph::unordered_map stag_snap_map; + + pthread_key_t fuse_req_key; + void set_fuse_req(fuse_req_t); + fuse_req_t get_fuse_req(); + + struct fuse_args args; +}; + +static int getgroups(fuse_req_t req, gid_t **sgids) +{ +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + assert(sgids); + int c = fuse_req_getgroups(req, 0, NULL); + if (c < 0) { + return c; + } + if (c == 0) { + return 0; + } + + gid_t *gids = new (std::nothrow) gid_t[c]; + if (!gids) { + return -ENOMEM; + } + c = fuse_req_getgroups(req, c, gids); + if (c < 0) { + delete gids; + } else { + *sgids = gids; + } + return c; +#endif + return -ENOSYS; +} + +static int getgroups_cb(void *handle, gid_t **sgids) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *) handle; + fuse_req_t req = cfuse->get_fuse_req(); + return getgroups(req, sgids); +} + +#define GET_GROUPS(perms, req) { \ + if (g_conf->get_val("fuse_set_user_groups")) { \ + gid_t *gids = NULL; \ + int count = getgroups(req, &gids); \ + perms.init_gids(gids, count); \ + perms.take_gids(); \ + } } + + +static CephFuse::Handle *fuse_ll_req_prepare(fuse_req_t req) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + cfuse->set_fuse_req(req); + return cfuse; +} + +static void fuse_ll_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + struct fuse_entry_param fe; + Inode *i2, *i1 = cfuse->iget(parent); // see below + int r; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + memset(&fe, 0, sizeof(fe)); + r = cfuse->client->ll_lookup(i1, name, &fe.attr, &i2, perms); + if (r >= 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, -r); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); +} + +static void fuse_ll_forget(fuse_req_t req, fuse_ino_t ino, + long unsigned nlookup) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + cfuse->client->ll_forget(cfuse->iget(ino), nlookup+1); + fuse_reply_none(req); +} + +static void fuse_ll_getattr(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + struct stat stbuf; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + (void) fi; // XXX + + if (cfuse->client->ll_getattr(in, &stbuf, perms) + == 0) { + stbuf.st_ino = cfuse->make_fake_ino(stbuf.st_ino, stbuf.st_dev); + stbuf.st_rdev = new_encode_dev(stbuf.st_rdev); + fuse_reply_attr(req, &stbuf, 0); + } else + fuse_reply_err(req, ENOENT); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int to_set, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int mask = 0; + if (to_set & FUSE_SET_ATTR_MODE) mask |= CEPH_SETATTR_MODE; + if (to_set & FUSE_SET_ATTR_UID) mask |= CEPH_SETATTR_UID; + if (to_set & FUSE_SET_ATTR_GID) mask |= CEPH_SETATTR_GID; + if (to_set & FUSE_SET_ATTR_MTIME) mask |= CEPH_SETATTR_MTIME; + if (to_set & FUSE_SET_ATTR_ATIME) mask |= CEPH_SETATTR_ATIME; + if (to_set & FUSE_SET_ATTR_SIZE) mask |= CEPH_SETATTR_SIZE; +#if !defined(DARWIN) + if (to_set & FUSE_SET_ATTR_MTIME_NOW) mask |= CEPH_SETATTR_MTIME_NOW; + if (to_set & FUSE_SET_ATTR_ATIME_NOW) mask |= CEPH_SETATTR_ATIME_NOW; +#endif + + int r = cfuse->client->ll_setattr(in, attr, mask, perms); + if (r == 0) + fuse_reply_attr(req, attr, 0); + else + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +// XATTRS + +static void fuse_ll_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, + int flags +#if defined(DARWIN) + ,uint32_t pos +#endif + ) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_setxattr(in, name, value, size, flags, perms); + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + char buf[size]; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_listxattr(in, buf, size, perms); + if (size == 0 && r >= 0) + fuse_reply_xattr(req, r); + else if (r >= 0) + fuse_reply_buf(req, buf, r); + else + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + size_t size +#if defined(DARWIN) + ,uint32_t position +#endif + ) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + char buf[size]; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_getxattr(in, name, buf, size, perms); + if (size == 0 && r >= 0) + fuse_reply_xattr(req, r); + else if (r >= 0) + fuse_reply_buf(req, buf, r); + else + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_removexattr(fuse_req_t req, fuse_ino_t ino, + const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_removexattr(in, name, perms); + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_opendir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + void *dirp; + + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_opendir(in, fi->flags, (dir_result_t **)&dirp, + perms); + if (r >= 0) { + fi->fh = (uint64_t)dirp; + fuse_reply_open(req, fi); + } else { + fuse_reply_err(req, -r); + } + + cfuse->iput(in); // iput required +} + +static void fuse_ll_readlink(fuse_req_t req, fuse_ino_t ino) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + char buf[PATH_MAX + 1]; // leave room for a null terminator + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_readlink(in, buf, sizeof(buf) - 1, perms); + if (r >= 0) { + buf[r] = '\0'; + fuse_reply_readlink(req, buf); + } else { + fuse_reply_err(req, -r); + } + + cfuse->iput(in); // iput required +} + +static void fuse_ll_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, dev_t rdev) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *i2, *i1 = cfuse->iget(parent); + struct fuse_entry_param fe; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + memset(&fe, 0, sizeof(fe)); + + int r = cfuse->client->ll_mknod(i1, name, mode, new_decode_dev(rdev), + &fe.attr, &i2, perms); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, -r); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *i2, *i1; + struct fuse_entry_param fe; + + memset(&fe, 0, sizeof(fe)); + UserPerm perm(ctx->uid, ctx->gid); + GET_GROUPS(perm, req); +#ifdef HAVE_SYS_SYNCFS + if (cfuse->fino_snap(parent) == CEPH_SNAPDIR && + cfuse->client->cct->_conf->fuse_multithreaded && + cfuse->client->cct->_conf->fuse_syncfs_on_mksnap) { + int err = 0; + int fd = ::open(cfuse->mountpoint, O_RDONLY | O_DIRECTORY); + if (fd < 0) { + err = errno; + } else { + int r = ::syncfs(fd); + if (r < 0) + err = errno; + ::close(fd); + } + if (err) { + fuse_reply_err(req, err); + return; + } + } +#endif + + i1 = cfuse->iget(parent); + int r = cfuse->client->ll_mkdir(i1, name, mode, &fe.attr, &i2, perm); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, -r); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(parent); + UserPerm perm(ctx->uid, ctx->gid); + GET_GROUPS(perm, req); + + int r = cfuse->client->ll_unlink(in, name, perm); + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(parent); + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_rmdir(in, name, perms); + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_symlink(fuse_req_t req, const char *existing, + fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *i2, *i1 = cfuse->iget(parent); + struct fuse_entry_param fe; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + memset(&fe, 0, sizeof(fe)); + + int r = cfuse->client->ll_symlink(i1, name, existing, &fe.attr, &i2, perms); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, -r); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + fuse_ino_t newparent, const char *newname) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(parent); + Inode *nin = cfuse->iget(newparent); + UserPerm perm(ctx->uid, ctx->gid); + GET_GROUPS(perm, req); + + int r = cfuse->client->ll_rename(in, name, nin, newname, perm); + fuse_reply_err(req, -r); + + cfuse->iput(in); // iputs required + cfuse->iput(nin); +} + +static void fuse_ll_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, + const char *newname) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + Inode *nin = cfuse->iget(newparent); + struct fuse_entry_param fe; + + memset(&fe, 0, sizeof(fe)); + UserPerm perm(ctx->uid, ctx->gid); + GET_GROUPS(perm, req); + + /* + * Note that we could successfully link, but then fail the subsequent + * getattr and return an error. Perhaps we should ignore getattr errors, + * but then how do we tell FUSE that the attrs are bogus? + */ + int r = cfuse->client->ll_link(in, nin, newname, perm); + if (r == 0) { + r = cfuse->client->ll_getattr(in, &fe.attr, perm); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } + } + + if (r != 0) { + /* + * Many ll operations in libcephfs return an extra inode reference, but + * ll_link currently does not. Still, FUSE needs one for the new dentry, + * so we commandeer the reference taken earlier when ll_link is successful. + * On error however, we must put that reference. + */ + cfuse->iput(in); + fuse_reply_err(req, -r); + } + + cfuse->iput(nin); +} + +static void fuse_ll_open(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *in = cfuse->iget(ino); + Fh *fh = NULL; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_open(in, fi->flags, &fh, perms); + if (r == 0) { + fi->fh = (uint64_t)fh; +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + if (cfuse->client->cct->_conf->fuse_disable_pagecache) + fi->direct_io = 1; + else if (cfuse->client->cct->_conf->fuse_use_invalidate_cb) + fi->keep_cache = 1; +#endif + fuse_reply_open(req, fi); + } else { + fuse_reply_err(req, -r); + } + + cfuse->iput(in); // iput required +} + +static void fuse_ll_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast(fi->fh); + bufferlist bl; + int r = cfuse->client->ll_read(fh, off, size, &bl); + if (r >= 0) + fuse_reply_buf(req, bl.c_str(), bl.length()); + else + fuse_reply_err(req, -r); +} + +static void fuse_ll_write(fuse_req_t req, fuse_ino_t ino, const char *buf, + size_t size, off_t off, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast(fi->fh); + int r = cfuse->client->ll_write(fh, off, size, buf); + if (r >= 0) + fuse_reply_write(req, r); + else + fuse_reply_err(req, -r); +} + +static void fuse_ll_flush(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast(fi->fh); + int r = cfuse->client->ll_flush(fh); + fuse_reply_err(req, -r); +} + +#ifdef FUSE_IOCTL_COMPAT +static void fuse_ll_ioctl(fuse_req_t req, fuse_ino_t ino, int cmd, void *arg, struct fuse_file_info *fi, + unsigned flags, const void *in_buf, size_t in_bufsz, size_t out_bufsz) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + + if (flags & FUSE_IOCTL_COMPAT) { + fuse_reply_err(req, ENOSYS); + return; + } + + switch (static_cast(cmd)) { + case CEPH_IOC_GET_LAYOUT: { + file_layout_t layout; + struct ceph_ioctl_layout l; + Fh *fh = (Fh*)fi->fh; + cfuse->client->ll_file_layout(fh, &layout); + l.stripe_unit = layout.stripe_unit; + l.stripe_count = layout.stripe_count; + l.object_size = layout.object_size; + l.data_pool = layout.pool_id; + fuse_reply_ioctl(req, 0, &l, sizeof(struct ceph_ioctl_layout)); + } + break; + default: + fuse_reply_err(req, EINVAL); + } +} +#endif + +#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9) + +static void fuse_ll_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, + off_t offset, off_t length, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = (Fh*)fi->fh; + int r = cfuse->client->ll_fallocate(fh, mode, offset, length); + fuse_reply_err(req, -r); +} + +#endif + +static void fuse_ll_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast(fi->fh); + int r = cfuse->client->ll_release(fh); + fuse_reply_err(req, -r); +} + +static void fuse_ll_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast(fi->fh); + int r = cfuse->client->ll_fsync(fh, datasync); + fuse_reply_err(req, -r); +} + +struct readdir_context { + fuse_req_t req; + char *buf; + size_t size; + size_t pos; /* in buf */ + uint64_t snap; +}; + +/* + * return 0 on success, -1 if out of space + */ +static int fuse_ll_add_dirent(void *p, struct dirent *de, + struct ceph_statx *stx, off_t next_off, + Inode *in) +{ + struct readdir_context *c = (struct readdir_context *)p; + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(c->req); + + struct stat st; + st.st_ino = cfuse->make_fake_ino(stx->stx_ino, c->snap); + st.st_mode = stx->stx_mode; + st.st_rdev = new_encode_dev(stx->stx_rdev); + + size_t room = c->size - c->pos; + size_t entrysize = fuse_add_direntry(c->req, c->buf + c->pos, room, + de->d_name, &st, next_off); + if (entrysize > room) + return -ENOSPC; + + /* success */ + c->pos += entrysize; + return 0; +} + +static void fuse_ll_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t off, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + + dir_result_t *dirp = reinterpret_cast(fi->fh); + cfuse->client->seekdir(dirp, off); + + struct readdir_context rc; + rc.req = req; + rc.buf = new char[size]; + rc.size = size; + rc.pos = 0; + rc.snap = cfuse->fino_snap(ino); + + int r = cfuse->client->readdir_r_cb(dirp, fuse_ll_add_dirent, &rc); + if (r == 0 || r == -ENOSPC) /* ignore ENOSPC from our callback */ + fuse_reply_buf(req, rc.buf, rc.pos); + else + fuse_reply_err(req, -r); + delete[] rc.buf; +} + +static void fuse_ll_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + dir_result_t *dirp = reinterpret_cast(fi->fh); + cfuse->client->ll_releasedir(dirp); + fuse_reply_err(req, 0); +} + +static void fuse_ll_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + dir_result_t *dirp = reinterpret_cast(fi->fh); + int r = cfuse->client->ll_fsyncdir(dirp); + fuse_reply_err(req, -r); +} + +static void fuse_ll_access(fuse_req_t req, fuse_ino_t ino, int mask) +{ + fuse_reply_err(req, 0); +} + +static void fuse_ll_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *i1 = cfuse->iget(parent), *i2; + struct fuse_entry_param fe; + Fh *fh = NULL; + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + memset(&fe, 0, sizeof(fe)); + + // pass &i2 for the created inode so that ll_create takes an initial ll_ref + int r = cfuse->client->ll_create(i1, name, mode, fi->flags, &fe.attr, &i2, + &fh, perms); + if (r == 0) { + fi->fh = (uint64_t)fh; + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + if (cfuse->client->cct->_conf->fuse_disable_pagecache) + fi->direct_io = 1; + else if (cfuse->client->cct->_conf->fuse_use_invalidate_cb) + fi->keep_cache = 1; +#endif + fuse_reply_create(req, &fe, fi); + } else + fuse_reply_err(req, -r); + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_statfs(fuse_req_t req, fuse_ino_t ino) +{ + struct statvfs stbuf; + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Inode *in = cfuse->iget(ino); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + GET_GROUPS(perms, req); + + int r = cfuse->client->ll_statfs(in, &stbuf, perms); + if (r == 0) + fuse_reply_statfs(req, &stbuf); + else + fuse_reply_err(req, -r); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_getlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast(fi->fh); + + int r = cfuse->client->ll_getlk(fh, lock, fi->lock_owner); + if (r == 0) + fuse_reply_lock(req, lock); + else + fuse_reply_err(req, -r); +} + +static void fuse_ll_setlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock, int sleep) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast(fi->fh); + + // must use multithread if operation may block + if (!cfuse->client->cct->_conf->fuse_multithreaded && + sleep && lock->l_type != F_UNLCK) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_setlk(fh, lock, fi->lock_owner, sleep); + fuse_reply_err(req, -r); +} + +static void fuse_ll_interrupt(fuse_req_t req, void* data) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + cfuse->client->ll_interrupt(data); +} + +static void switch_interrupt_cb(void *handle, void* data) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_req_t req = cfuse->get_fuse_req(); + + if (data) + fuse_req_interrupt_func(req, fuse_ll_interrupt, data); + else + fuse_req_interrupt_func(req, NULL, NULL); +} + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) +static void fuse_ll_flock(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, int cmd) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = (Fh*)fi->fh; + + // must use multithread if operation may block + if (!cfuse->client->cct->_conf->fuse_multithreaded && + !(cmd & (LOCK_NB | LOCK_UN))) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_flock(fh, cmd, fi->lock_owner); + fuse_reply_err(req, -r); +} +#endif + +#if !defined(DARWIN) +static mode_t umask_cb(void *handle) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_req_t req = cfuse->get_fuse_req(); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + return ctx->umask; +} +#endif + +static void ino_invalidate_cb(void *handle, vinodeno_t vino, int64_t off, + int64_t len) +{ +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_ino_t fino = cfuse->make_fake_ino(vino.ino, vino.snapid); + fuse_lowlevel_notify_inval_inode(cfuse->ch, fino, off, len); +#endif +} + +static void dentry_invalidate_cb(void *handle, vinodeno_t dirino, + vinodeno_t ino, string& name) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_ino_t fdirino = cfuse->make_fake_ino(dirino.ino, dirino.snapid); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) + fuse_ino_t fino = 0; + if (ino.ino != inodeno_t()) + fino = cfuse->make_fake_ino(ino.ino, ino.snapid); + fuse_lowlevel_notify_delete(cfuse->ch, fdirino, fino, name.c_str(), name.length()); +#elif FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + fuse_lowlevel_notify_inval_entry(cfuse->ch, fdirino, name.c_str(), name.length()); +#endif +} + +static int remount_cb(void *handle) +{ + // used for trimming kernel dcache. when remounting a file system, linux kernel + // trims all unused dentries in the file system + char cmd[1024]; + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + snprintf(cmd, sizeof(cmd), "mount -i -o remount %s", cfuse->mountpoint); + int r = system(cmd); + if (r != 0 && r != -1) { + r = WEXITSTATUS(r); + } + + return r; +} + +static void do_init(void *data, fuse_conn_info *conn) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)data; + Client *client = cfuse->client; + +#if !defined(DARWIN) + if (!client->cct->_conf->fuse_default_permissions && + client->ll_handle_umask()) { + // apply umask in userspace if posix acl is enabled + if(conn->capable & FUSE_CAP_DONT_MASK) + conn->want |= FUSE_CAP_DONT_MASK; + } +#endif + + if (cfuse->fd_on_success) { + //cout << "fuse init signaling on fd " << fd_on_success << std::endl; + // see Preforker::daemonize(), ceph-fuse's parent process expects a `-1` + // from a daemonized child process. + uint32_t r = -1; + int err = safe_write(cfuse->fd_on_success, &r, sizeof(r)); + if (err) { + derr << "fuse_ll: do_init: safe_write failed with error " + << cpp_strerror(err) << dendl; + ceph_abort(); + } + //cout << "fuse init done signaling on fd " << fd_on_success << std::endl; + + // close stdout, etc. + ::close(0); + ::close(1); + ::close(2); + } +} + +const static struct fuse_lowlevel_ops fuse_ll_oper = { + init: do_init, + destroy: 0, + lookup: fuse_ll_lookup, + forget: fuse_ll_forget, + getattr: fuse_ll_getattr, + setattr: fuse_ll_setattr, + readlink: fuse_ll_readlink, + mknod: fuse_ll_mknod, + mkdir: fuse_ll_mkdir, + unlink: fuse_ll_unlink, + rmdir: fuse_ll_rmdir, + symlink: fuse_ll_symlink, + rename: fuse_ll_rename, + link: fuse_ll_link, + open: fuse_ll_open, + read: fuse_ll_read, + write: fuse_ll_write, + flush: fuse_ll_flush, + release: fuse_ll_release, + fsync: fuse_ll_fsync, + opendir: fuse_ll_opendir, + readdir: fuse_ll_readdir, + releasedir: fuse_ll_releasedir, + fsyncdir: fuse_ll_fsyncdir, + statfs: fuse_ll_statfs, + setxattr: fuse_ll_setxattr, + getxattr: fuse_ll_getxattr, + listxattr: fuse_ll_listxattr, + removexattr: fuse_ll_removexattr, + access: fuse_ll_access, + create: fuse_ll_create, + getlk: fuse_ll_getlk, + setlk: fuse_ll_setlk, + bmap: 0, +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) +#ifdef FUSE_IOCTL_COMPAT + ioctl: fuse_ll_ioctl, +#else + ioctl: 0, +#endif + poll: 0, +#endif +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) + write_buf: 0, + retrieve_reply: 0, + forget_multi: 0, + flock: fuse_ll_flock, +#endif +#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9) + fallocate: fuse_ll_fallocate +#endif +}; + + +CephFuse::Handle::Handle(Client *c, int fd) : + fd_on_success(fd), + client(c), + ch(NULL), + se(NULL), + mountpoint(NULL), + stag_lock("fuse_ll.cc stag_lock"), + last_stag(0) +{ + snap_stag_map[CEPH_NOSNAP] = 0; + stag_snap_map[0] = CEPH_NOSNAP; + memset(&args, 0, sizeof(args)); +} + +CephFuse::Handle::~Handle() +{ + fuse_opt_free_args(&args); +} + +void CephFuse::Handle::finalize() +{ + if (se) + fuse_remove_signal_handlers(se); + if (ch) + fuse_session_remove_chan(ch); + if (se) + fuse_session_destroy(se); + if (ch) + fuse_unmount(mountpoint, ch); + + pthread_key_delete(fuse_req_key); +} + +int CephFuse::Handle::init(int argc, const char *argv[]) +{ + + int r = pthread_key_create(&fuse_req_key, NULL); + if (r) { + derr << "pthread_key_create failed." << dendl; + return r; + } + + // set up fuse argc/argv + int newargc = 0; + const char **newargv = (const char **) malloc((argc + 10) * sizeof(char *)); + if(!newargv) + return ENOMEM; + + newargv[newargc++] = argv[0]; + newargv[newargc++] = "-f"; // stay in foreground + + if (client->cct->_conf->fuse_allow_other) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "allow_other"; + } + if (client->cct->_conf->fuse_default_permissions) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "default_permissions"; + } +#if defined(__linux__) + if (client->cct->_conf->fuse_big_writes) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "big_writes"; + } + if (client->cct->_conf->fuse_atomic_o_trunc) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "atomic_o_trunc"; + } +#endif + if (client->cct->_conf->fuse_debug) + newargv[newargc++] = "-d"; + + for (int argctr = 1; argctr < argc; argctr++) + newargv[newargc++] = argv[argctr]; + + derr << "init, newargv = " << newargv << " newargc=" << newargc << dendl; + struct fuse_args a = FUSE_ARGS_INIT(newargc, (char**)newargv); + args = a; // Roundabout construction b/c FUSE_ARGS_INIT is for initialization not assignment + + if (fuse_parse_cmdline(&args, &mountpoint, NULL, NULL) == -1) { + derr << "fuse_parse_cmdline failed." << dendl; + fuse_opt_free_args(&args); + free(newargv); + return EINVAL; + } + + assert(args.allocated); // Checking fuse has realloc'd args so we can free newargv + free(newargv); + return 0; +} + +int CephFuse::Handle::start() +{ + ch = fuse_mount(mountpoint, &args); + if (!ch) { + derr << "fuse_mount(mountpoint=" << mountpoint << ") failed." << dendl; + return EIO; + } + + se = fuse_lowlevel_new(&args, &fuse_ll_oper, sizeof(fuse_ll_oper), this); + if (!se) { + derr << "fuse_lowlevel_new failed" << dendl; + return EDOM; + } + + signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); + if (fuse_set_signal_handlers(se) == -1) { + derr << "fuse_set_signal_handlers failed" << dendl; + return ENOSYS; + } + + fuse_session_add_chan(se, ch); + + + struct client_callback_args args = { + handle: this, + ino_cb: client->cct->_conf->fuse_use_invalidate_cb ? ino_invalidate_cb : NULL, + dentry_cb: dentry_invalidate_cb, + switch_intr_cb: switch_interrupt_cb, +#if defined(__linux__) + remount_cb: remount_cb, +#endif + getgroups_cb: getgroups_cb, +#if !defined(DARWIN) + umask_cb: umask_cb, +#endif + }; + client->ll_register_callbacks(&args); + + return 0; +} + +int CephFuse::Handle::loop() +{ + if (client->cct->_conf->fuse_multithreaded) { + return fuse_session_loop_mt(se); + } else { + return fuse_session_loop(se); + } +} + +uint64_t CephFuse::Handle::fino_snap(uint64_t fino) +{ + if (fino == FUSE_ROOT_ID) + return CEPH_NOSNAP; + + if (client->use_faked_inos()) { + vinodeno_t vino = client->map_faked_ino(fino); + return vino.snapid; + } else { + Mutex::Locker l(stag_lock); + uint64_t stag = FINO_STAG(fino); + assert(stag_snap_map.count(stag)); + return stag_snap_map[stag]; + } +} + +Inode * CephFuse::Handle::iget(fuse_ino_t fino) +{ + if (fino == FUSE_ROOT_ID) + return client->get_root(); + + if (client->use_faked_inos()) { + return client->ll_get_inode((ino_t)fino); + } else { + vinodeno_t vino(FINO_INO(fino), fino_snap(fino)); + return client->ll_get_inode(vino); + } +} + +void CephFuse::Handle::iput(Inode *in) +{ + client->ll_put(in); +} + +uint64_t CephFuse::Handle::make_fake_ino(inodeno_t ino, snapid_t snapid) +{ + if (client->use_faked_inos()) { + // already faked by libcephfs + if (ino == client->get_root_ino()) + return FUSE_ROOT_ID; + + return ino; + } else { + if (snapid == CEPH_NOSNAP && ino == client->get_root_ino()) + return FUSE_ROOT_ID; + + Mutex::Locker l(stag_lock); + uint64_t stag; + if (snap_stag_map.count(snapid) == 0) { + stag = ++last_stag; + snap_stag_map[snapid] = stag; + stag_snap_map[stag] = snapid; + } else + stag = snap_stag_map[snapid]; + inodeno_t fino = MAKE_FINO(ino, stag); + //cout << "make_fake_ino " << ino << "." << snapid << " -> " << fino << std::endl; + return fino; + } +} + +void CephFuse::Handle::set_fuse_req(fuse_req_t req) +{ + pthread_setspecific(fuse_req_key, (void*)req); +} + +fuse_req_t CephFuse::Handle::get_fuse_req() +{ + return (fuse_req_t) pthread_getspecific(fuse_req_key); +} + + +CephFuse::CephFuse(Client *c, int fd) : _handle(new CephFuse::Handle(c, fd)) +{ +} + +CephFuse::~CephFuse() +{ + delete _handle; +} + +int CephFuse::init(int argc, const char *argv[]) +{ + return _handle->init(argc, argv); +} + +int CephFuse::start() +{ + return _handle->start(); +} + +int CephFuse::loop() +{ + return _handle->loop(); +} + +void CephFuse::finalize() +{ + return _handle->finalize(); +} + +std::string CephFuse::get_mount_point() const +{ + if (_handle->mountpoint) { + return _handle->mountpoint; + } else { + return ""; + } +}