X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;ds=sidebyside;f=kernel%2Fdrivers%2Fblock%2Frbd.c;h=fbdddd6f94b8dcabfca98d0204e65b9c85485da5;hb=3e03ee6596321cb9752868eecdd8d82a508b16d6;hp=010ce0b1f517f3ac00b788c9883296fdb09cd273;hpb=41f827bfbb10e03c4d228fbcc801dd51fb9983b0;p=kvmfornfv.git diff --git a/kernel/drivers/block/rbd.c b/kernel/drivers/block/rbd.c index 010ce0b1f..fbdddd6f9 100644 --- a/kernel/drivers/block/rbd.c +++ b/kernel/drivers/block/rbd.c @@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v) #define RBD_MINORS_PER_MAJOR 256 #define RBD_SINGLE_MAJOR_PART_SHIFT 4 +#define RBD_MAX_PARENT_CHAIN_LEN 16 + #define RBD_SNAP_DEV_NAME_PREFIX "snap_" #define RBD_MAX_SNAP_NAME_LEN \ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) @@ -346,6 +348,7 @@ struct rbd_device { struct rbd_image_header header; unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; + struct rbd_options *opts; char *header_name; @@ -415,8 +418,6 @@ MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (d static int rbd_img_request_submit(struct rbd_img_request *img_request); -static void rbd_dev_device_release(struct device *dev); - static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count); static ssize_t rbd_remove(struct bus_type *bus, const char *buf, @@ -425,7 +426,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf, size_t count); static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf, size_t count); -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth); static void rbd_spec_put(struct rbd_spec *spec); static int rbd_dev_id_to_minor(int dev_id) @@ -725,34 +726,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) } /* - * mount options + * (Per device) rbd map options */ enum { + Opt_queue_depth, Opt_last_int, /* int args above */ Opt_last_string, /* string args above */ Opt_read_only, Opt_read_write, - /* Boolean args above */ - Opt_last_bool, + Opt_err }; static match_table_t rbd_opts_tokens = { + {Opt_queue_depth, "queue_depth=%d"}, /* int args above */ /* string args above */ {Opt_read_only, "read_only"}, {Opt_read_only, "ro"}, /* Alternate spelling */ {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ - /* Boolean args above */ - {-1, NULL} + {Opt_err, NULL} }; struct rbd_options { + int queue_depth; bool read_only; }; +#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ #define RBD_READ_ONLY_DEFAULT false static int parse_rbd_opts_token(char *c, void *private) @@ -762,27 +765,27 @@ static int parse_rbd_opts_token(char *c, void *private) int token, intval, ret; token = match_token(c, rbd_opts_tokens, argstr); - if (token < 0) - return -EINVAL; - if (token < Opt_last_int) { ret = match_int(&argstr[0], &intval); if (ret < 0) { - pr_err("bad mount option arg (not int) " - "at '%s'\n", c); + pr_err("bad mount option arg (not int) at '%s'\n", c); return ret; } dout("got int token %d val %d\n", token, intval); } else if (token > Opt_last_int && token < Opt_last_string) { - dout("got string token %d val %s\n", token, - argstr[0].from); - } else if (token > Opt_last_string && token < Opt_last_bool) { - dout("got Boolean token %d\n", token); + dout("got string token %d val %s\n", token, argstr[0].from); } else { dout("got token %d\n", token); } switch (token) { + case Opt_queue_depth: + if (intval < 1) { + pr_err("queue_depth out of range\n"); + return -EINVAL; + } + rbd_opts->queue_depth = intval; + break; case Opt_read_only: rbd_opts->read_only = true; break; @@ -790,9 +793,10 @@ static int parse_rbd_opts_token(char *c, void *private) rbd_opts->read_only = false; break; default: - rbd_assert(false); - break; + /* libceph prints "bad option" msg */ + return -EINVAL; } + return 0; } @@ -1564,22 +1568,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request) /* * Wait for an object request to complete. If interrupted, cancel the * underlying osd request. + * + * @timeout: in jiffies, 0 means "wait forever" */ -static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request, + unsigned long timeout) { - int ret; + long ret; dout("%s %p\n", __func__, obj_request); - - ret = wait_for_completion_interruptible(&obj_request->completion); - if (ret < 0) { - dout("%s %p interrupted\n", __func__, obj_request); + ret = wait_for_completion_interruptible_timeout( + &obj_request->completion, + ceph_timeout_jiffies(timeout)); + if (ret <= 0) { + if (ret == 0) + ret = -ETIMEDOUT; rbd_obj_request_end(obj_request); - return ret; + } else { + ret = 0; } - dout("%s %p done\n", __func__, obj_request); - return 0; + dout("%s %p ret %d\n", __func__, obj_request, (int)ret); + return ret; +} + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +{ + return __rbd_obj_request_wait(obj_request, 0); +} + +static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request, + unsigned long timeout) +{ + return __rbd_obj_request_wait(obj_request, timeout); } static void rbd_img_request_complete(struct rbd_img_request *img_request) @@ -1842,9 +1863,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_osd_read_callback(obj_request); break; case CEPH_OSD_OP_SETALLOCHINT: - rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE); + rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE || + osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL); /* fall through */ case CEPH_OSD_OP_WRITE: + case CEPH_OSD_OP_WRITEFULL: rbd_osd_write_callback(obj_request); break; case CEPH_OSD_OP_STAT: @@ -1932,7 +1955,7 @@ static struct ceph_osd_request *rbd_osd_req_create( osdc = &rbd_dev->rbd_client->client->osdc; osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, - GFP_ATOMIC); + GFP_NOIO); if (!osd_req) return NULL; /* ENOMEM */ @@ -1981,7 +2004,7 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) rbd_dev = img_request->rbd_dev; osdc = &rbd_dev->rbd_client->client->osdc; osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops, - false, GFP_ATOMIC); + false, GFP_NOIO); if (!osd_req) return NULL; /* ENOMEM */ @@ -2380,7 +2403,10 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, opcode = CEPH_OSD_OP_ZERO; } } else if (op_type == OBJ_OP_WRITE) { - opcode = CEPH_OSD_OP_WRITE; + if (!offset && length == object_size) + opcode = CEPH_OSD_OP_WRITEFULL; + else + opcode = CEPH_OSD_OP_WRITE; osd_req_op_alloc_hint_init(osd_request, num_ops, object_size, object_size); num_ops++; @@ -2389,7 +2415,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, } if (opcode == CEPH_OSD_OP_DELETE) - osd_req_op_init(osd_request, num_ops, opcode); + osd_req_op_init(osd_request, num_ops, opcode, 0); else osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length, 0, 0); @@ -2480,7 +2506,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, bio_chain_clone_range(&bio_list, &bio_offset, clone_size, - GFP_ATOMIC); + GFP_NOIO); if (!obj_request->bio_list) goto out_unwind; } else if (type == OBJ_REQUEST_PAGES) { @@ -2860,7 +2886,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) goto out; stat_request->callback = rbd_img_obj_exists_callback; - osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT); + osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0); osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0, false, false); rbd_osd_req_format_read(stat_request); @@ -3134,6 +3160,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( bool watch) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct ceph_options *opts = osdc->client->options; struct rbd_obj_request *obj_request; int ret; @@ -3160,7 +3187,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( if (ret) goto out; - ret = rbd_obj_request_wait(obj_request); + ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout); if (ret) goto out; @@ -3415,6 +3442,7 @@ static void rbd_queue_workfn(struct work_struct *work) goto err_rq; } img_request->rq = rq; + snapc = NULL; /* img_request consumes a ref */ if (op_type == OBJ_OP_DISCARD) result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, @@ -3452,52 +3480,6 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; } -/* - * a queue callback. Makes sure that we don't create a bio that spans across - * multiple osd objects. One exception would be with a single page bios, - * which we handle later at bio_chain_clone_range() - */ -static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd, - struct bio_vec *bvec) -{ - struct rbd_device *rbd_dev = q->queuedata; - sector_t sector_offset; - sector_t sectors_per_obj; - sector_t obj_sector_offset; - int ret; - - /* - * Find how far into its rbd object the partition-relative - * bio start sector is to offset relative to the enclosing - * device. - */ - sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector; - sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT); - obj_sector_offset = sector_offset & (sectors_per_obj - 1); - - /* - * Compute the number of bytes from that offset to the end - * of the object. Account for what's already used by the bio. - */ - ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT; - if (ret > bmd->bi_size) - ret -= bmd->bi_size; - else - ret = 0; - - /* - * Don't send back more than was asked for. And if the bio - * was empty, let the whole thing through because: "Note - * that a block device *must* allow a single page to be - * added to an empty bio." - */ - rbd_assert(bvec->bv_len <= PAGE_SIZE); - if (ret > (int) bvec->bv_len || !bmd->bi_size) - ret = (int) bvec->bv_len; - - return ret; -} - static void rbd_free_disk(struct rbd_device *rbd_dev) { struct gendisk *disk = rbd_dev->disk; @@ -3762,10 +3744,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); rbd_dev->tag_set.ops = &rbd_mq_ops; - rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ; + rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; rbd_dev->tag_set.numa_node = NUMA_NO_NODE; - rbd_dev->tag_set.flags = - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; rbd_dev->tag_set.nr_hw_queues = 1; rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); @@ -3785,6 +3766,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) /* set io sizes to object size */ segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + q->limits.max_sectors = queue_max_hw_sectors(q); + blk_queue_max_segments(q, segment_size / SECTOR_SIZE); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); @@ -3793,10 +3776,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); q->limits.discard_granularity = segment_size; q->limits.discard_alignment = segment_size; - q->limits.max_discard_sectors = segment_size / SECTOR_SIZE; + blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); q->limits.discard_zeroes_data = 1; - blk_queue_merge_bvec(q, rbd_merge_bvec); + if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) + q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + disk->queue = q; q->queuedata = rbd_dev; @@ -4005,14 +3990,12 @@ static const struct attribute_group *rbd_attr_groups[] = { NULL }; -static void rbd_sysfs_dev_release(struct device *dev) -{ -} +static void rbd_dev_release(struct device *dev); static struct device_type rbd_device_type = { .name = "rbd", .groups = rbd_attr_groups, - .release = rbd_sysfs_dev_release, + .release = rbd_dev_release, }; static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) @@ -4055,8 +4038,28 @@ static void rbd_spec_free(struct kref *kref) kfree(spec); } +static void rbd_dev_release(struct device *dev) +{ + struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + bool need_put = !!rbd_dev->opts; + + rbd_put_client(rbd_dev->rbd_client); + rbd_spec_put(rbd_dev->spec); + kfree(rbd_dev->opts); + kfree(rbd_dev); + + /* + * This is racy, but way better than putting module outside of + * the release callback. The race window is pretty small, so + * doing something similar to dm (dm-builtin.c) is overkill. + */ + if (need_put) + module_put(THIS_MODULE); +} + static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, - struct rbd_spec *spec) + struct rbd_spec *spec, + struct rbd_options *opts) { struct rbd_device *rbd_dev; @@ -4070,8 +4073,14 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, INIT_LIST_HEAD(&rbd_dev->node); init_rwsem(&rbd_dev->header_rwsem); - rbd_dev->spec = spec; + rbd_dev->dev.bus = &rbd_bus_type; + rbd_dev->dev.type = &rbd_device_type; + rbd_dev->dev.parent = &rbd_root_dev; + device_initialize(&rbd_dev->dev); + rbd_dev->rbd_client = rbdc; + rbd_dev->spec = spec; + rbd_dev->opts = opts; /* Initialize the layout used for all rbd requests */ @@ -4080,14 +4089,21 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); + /* + * If this is a mapping rbd_dev (as opposed to a parent one), + * pin our module. We have a ref from do_rbd_add(), so use + * __module_get(). + */ + if (rbd_dev->opts) + __module_get(THIS_MODULE); + return rbd_dev; } static void rbd_dev_destroy(struct rbd_device *rbd_dev) { - rbd_put_client(rbd_dev->rbd_client); - rbd_spec_put(rbd_dev->spec); - kfree(rbd_dev); + if (rbd_dev) + put_device(&rbd_dev->dev); } /* @@ -4695,7 +4711,10 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) } ret = rbd_dev_v2_snap_context(rbd_dev); - dout("rbd_dev_v2_snap_context returned %d\n", ret); + if (ret && first_time) { + kfree(rbd_dev->header.object_prefix); + rbd_dev->header.object_prefix = NULL; + } return ret; } @@ -4710,27 +4729,6 @@ static int rbd_dev_header_info(struct rbd_device *rbd_dev) return rbd_dev_v2_header_info(rbd_dev); } -static int rbd_bus_add_dev(struct rbd_device *rbd_dev) -{ - struct device *dev; - int ret; - - dev = &rbd_dev->dev; - dev->bus = &rbd_bus_type; - dev->type = &rbd_device_type; - dev->parent = &rbd_root_dev; - dev->release = rbd_dev_device_release; - dev_set_name(dev, "%d", rbd_dev->dev_id); - ret = device_register(dev); - - return ret; -} - -static void rbd_bus_del_dev(struct rbd_device *rbd_dev) -{ - device_unregister(&rbd_dev->dev); -} - /* * Get a unique rbd identifier for the given new rbd_dev, and add * the rbd_dev to the global list. @@ -4945,6 +4943,7 @@ static int rbd_add_parse_args(const char *buf, goto out_mem; rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; + rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, @@ -4975,8 +4974,8 @@ out_err: */ static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) { + struct ceph_options *opts = rbdc->client->options; u64 newest_epoch; - unsigned long timeout = rbdc->client->options->mount_timeout * HZ; int tries = 0; int ret; @@ -4991,7 +4990,8 @@ again: if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { ceph_monc_request_next_osdmap(&rbdc->client->monc); (void) ceph_monc_wait_osdmap(&rbdc->client->monc, - newest_epoch, timeout); + newest_epoch, + opts->mount_timeout); goto again; } else { /* the osdmap we have is new enough */ @@ -5142,45 +5142,51 @@ out_err: return ret; } -static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) +/* + * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() -> + * rbd_dev_image_probe() recursion depth, which means it's also the + * length of the already discovered part of the parent chain. + */ +static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth) { struct rbd_device *parent = NULL; - struct rbd_spec *parent_spec; - struct rbd_client *rbdc; int ret; if (!rbd_dev->parent_spec) return 0; - /* - * We need to pass a reference to the client and the parent - * spec when creating the parent rbd_dev. Images related by - * parent/child relationships always share both. - */ - parent_spec = rbd_spec_get(rbd_dev->parent_spec); - rbdc = __rbd_get_client(rbd_dev->rbd_client); - ret = -ENOMEM; - parent = rbd_dev_create(rbdc, parent_spec); - if (!parent) + if (++depth > RBD_MAX_PARENT_CHAIN_LEN) { + pr_info("parent chain is too long (%d)\n", depth); + ret = -EINVAL; + goto out_err; + } + + parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec, + NULL); + if (!parent) { + ret = -ENOMEM; goto out_err; + } + + /* + * Images related by parent/child relationships always share + * rbd_client and spec/parent_spec, so bump their refcounts. + */ + __rbd_get_client(rbd_dev->rbd_client); + rbd_spec_get(rbd_dev->parent_spec); - ret = rbd_dev_image_probe(parent, false); + ret = rbd_dev_image_probe(parent, depth); if (ret < 0) goto out_err; + rbd_dev->parent = parent; atomic_set(&rbd_dev->parent_ref, 1); - return 0; + out_err: - if (parent) { - rbd_dev_unparent(rbd_dev); - kfree(rbd_dev->header_name); + rbd_dev_unparent(rbd_dev); + if (parent) rbd_dev_destroy(parent); - } else { - rbd_put_client(rbdc); - rbd_spec_put(parent_spec); - } - return ret; } @@ -5225,7 +5231,8 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); - ret = rbd_bus_add_dev(rbd_dev); + dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); + ret = device_add(&rbd_dev->dev); if (ret) goto err_out_mapping; @@ -5248,8 +5255,6 @@ err_out_blkdev: unregister_blkdev(rbd_dev->major, rbd_dev->name); err_out_id: rbd_dev_id_put(rbd_dev); - rbd_dev_mapping_clear(rbd_dev); - return ret; } @@ -5298,7 +5303,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) * parent), initiate a watch on its header object before using that * object to get detailed information about the rbd image. */ -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) { int ret; @@ -5316,7 +5321,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) if (ret) goto err_out_format; - if (mapping) { + if (!depth) { ret = rbd_dev_header_watch_sync(rbd_dev); if (ret) { if (ret == -ENOENT) @@ -5337,7 +5342,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Otherwise this is a parent image, identified by pool, image * and snap ids - need to fill in names for those ids. */ - if (mapping) + if (!depth) ret = rbd_spec_fill_snap_id(rbd_dev); else ret = rbd_spec_fill_names(rbd_dev); @@ -5359,12 +5364,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Need to warn users if this image is the one being * mapped and has a parent. */ - if (mapping && rbd_dev->parent_spec) + if (!depth && rbd_dev->parent_spec) rbd_warn(rbd_dev, "WARNING: kernel layering is EXPERIMENTAL!"); } - ret = rbd_dev_probe_parent(rbd_dev); + ret = rbd_dev_probe_parent(rbd_dev, depth); if (ret) goto err_out_probe; @@ -5375,7 +5380,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) err_out_probe: rbd_dev_unprobe(rbd_dev); err_out_watch: - if (mapping) + if (!depth) rbd_dev_header_unwatch_sync(rbd_dev); out_header_name: kfree(rbd_dev->header_name); @@ -5397,7 +5402,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, struct rbd_spec *spec = NULL; struct rbd_client *rbdc; bool read_only; - int rc = -ENOMEM; + int rc; if (!try_module_get(THIS_MODULE)) return -ENODEV; @@ -5405,10 +5410,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, /* parse add command */ rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); if (rc < 0) - goto err_out_module; - read_only = rbd_opts->read_only; - kfree(rbd_opts); - rbd_opts = NULL; /* done with this */ + goto out; rbdc = rbd_get_client(ceph_opts); if (IS_ERR(rbdc)) { @@ -5434,18 +5436,22 @@ static ssize_t do_rbd_add(struct bus_type *bus, goto err_out_client; } - rbd_dev = rbd_dev_create(rbdc, spec); - if (!rbd_dev) + rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); + if (!rbd_dev) { + rc = -ENOMEM; goto err_out_client; + } rbdc = NULL; /* rbd_dev now owns this */ spec = NULL; /* rbd_dev now owns this */ + rbd_opts = NULL; /* rbd_dev now owns this */ - rc = rbd_dev_image_probe(rbd_dev, true); + rc = rbd_dev_image_probe(rbd_dev, 0); if (rc < 0) goto err_out_rbd_dev; /* If we are mapping a snapshot it must be marked read-only */ + read_only = rbd_dev->opts->read_only; if (rbd_dev->spec->snap_id != CEPH_NOSNAP) read_only = true; rbd_dev->mapping.read_only = read_only; @@ -5459,10 +5465,13 @@ static ssize_t do_rbd_add(struct bus_type *bus, */ rbd_dev_header_unwatch_sync(rbd_dev); rbd_dev_image_release(rbd_dev); - goto err_out_module; + goto out; } - return count; + rc = count; +out: + module_put(THIS_MODULE); + return rc; err_out_rbd_dev: rbd_dev_destroy(rbd_dev); @@ -5470,12 +5479,8 @@ err_out_client: rbd_put_client(rbdc); err_out_args: rbd_spec_put(spec); -err_out_module: - module_put(THIS_MODULE); - - dout("Error adding device %s\n", buf); - - return (ssize_t)rc; + kfree(rbd_opts); + goto out; } static ssize_t rbd_add(struct bus_type *bus, @@ -5495,17 +5500,15 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, return do_rbd_add(bus, buf, count); } -static void rbd_dev_device_release(struct device *dev) +static void rbd_dev_device_release(struct rbd_device *rbd_dev) { - struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - rbd_free_disk(rbd_dev); clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + device_del(&rbd_dev->dev); rbd_dev_mapping_clear(rbd_dev); if (!single_major) unregister_blkdev(rbd_dev->major, rbd_dev->name); rbd_dev_id_put(rbd_dev); - rbd_dev_mapping_clear(rbd_dev); } static void rbd_dev_remove_parent(struct rbd_device *rbd_dev) @@ -5590,9 +5593,8 @@ static ssize_t do_rbd_remove(struct bus_type *bus, * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting * in a potential use after free of rbd_dev->disk or rbd_dev. */ - rbd_bus_del_dev(rbd_dev); + rbd_dev_device_release(rbd_dev); rbd_dev_image_release(rbd_dev); - module_put(THIS_MODULE); return count; } @@ -5663,10 +5665,8 @@ static int rbd_slab_init(void) if (rbd_segment_name_cache) return 0; out_err: - if (rbd_obj_request_cache) { - kmem_cache_destroy(rbd_obj_request_cache); - rbd_obj_request_cache = NULL; - } + kmem_cache_destroy(rbd_obj_request_cache); + rbd_obj_request_cache = NULL; kmem_cache_destroy(rbd_img_request_cache); rbd_img_request_cache = NULL;