X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=qemu%2Fblock%2Fqcow2.c;fp=qemu%2Fblock%2Fqcow2.c;h=470734be9ff17016778a219d20c6c05deb3d035a;hb=437fd90c0250dee670290f9b714253671a990160;hp=76c331b387e147625f46d83787eaf0f84ee6365f;hpb=5bbd6fe9b8bab2a93e548c5a53b032d1939eec05;p=kvmfornfv.git diff --git a/qemu/block/qcow2.c b/qemu/block/qcow2.c index 76c331b38..470734be9 100644 --- a/qemu/block/qcow2.c +++ b/qemu/block/qcow2.c @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "qemu-common.h" +#include "qemu/osdep.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include #include "block/qcow2.h" @@ -34,6 +35,7 @@ #include "qapi-event.h" #include "trace.h" #include "qemu/option_int.h" +#include "qemu/cutils.h" /* Differences with QCOW: @@ -85,7 +87,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, uint64_t end_offset, void **p_feature_table, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowExtension ext; uint64_t offset; int ret; @@ -104,7 +106,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, printf("attempting to read extended header in offset %lu\n", offset); #endif - ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); + ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext)); if (ret < 0) { error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " "pread fail from offset %" PRIu64, offset); @@ -132,7 +134,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, sizeof(bs->backing_format)); return 2; } - ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); + ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len); if (ret < 0) { error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " "Could not read format name"); @@ -148,7 +150,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, case QCOW2_EXT_MAGIC_FEATURE_TABLE: if (p_feature_table != NULL) { void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); - ret = bdrv_pread(bs->file, offset , feature_table, ext.len); + ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len); if (ret < 0) { error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " "Could not read table"); @@ -169,7 +171,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, uext->len = ext.len; QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); - ret = bdrv_pread(bs->file, offset , uext->data, uext->len); + ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len); if (ret < 0) { error_setg_errno(errp, -ret, "ERROR: unknown extension: " "Could not read data"); @@ -187,7 +189,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, static void cleanup_unknown_header_ext(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; Qcow2UnknownHeaderExtension *uext, *next; QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { @@ -196,22 +198,8 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs) } } -static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs, - Error **errp, const char *fmt, ...) -{ - char msg[64]; - va_list ap; - - va_start(ap, fmt); - vsnprintf(msg, sizeof(msg), fmt, ap); - va_end(ap); - - error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bdrv_get_device_or_node_name(bs), "qcow2", msg); -} - -static void report_unsupported_feature(BlockDriverState *bs, - Error **errp, Qcow2Feature *table, uint64_t mask) +static void report_unsupported_feature(Error **errp, Qcow2Feature *table, + uint64_t mask) { char *features = g_strdup(""); char *old; @@ -236,7 +224,7 @@ static void report_unsupported_feature(BlockDriverState *bs, g_free(old); } - report_unsupported(bs, errp, "%s", features); + error_setg(errp, "Unsupported qcow2 feature(s): %s", features); g_free(features); } @@ -249,7 +237,7 @@ static void report_unsupported_feature(BlockDriverState *bs, */ int qcow2_mark_dirty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t val; int ret; @@ -260,12 +248,12 @@ int qcow2_mark_dirty(BlockDriverState *bs) } val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); - ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), + ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features), &val, sizeof(val)); if (ret < 0) { return ret; } - ret = bdrv_flush(bs->file); + ret = bdrv_flush(bs->file->bs); if (ret < 0) { return ret; } @@ -282,7 +270,7 @@ int qcow2_mark_dirty(BlockDriverState *bs) */ static int qcow2_mark_clean(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { int ret; @@ -304,7 +292,7 @@ static int qcow2_mark_clean(BlockDriverState *bs) */ int qcow2_mark_corrupt(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; return qcow2_update_header(bs); @@ -316,7 +304,7 @@ int qcow2_mark_corrupt(BlockDriverState *bs) */ int qcow2_mark_consistent(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { int ret = bdrv_flush(bs); @@ -351,7 +339,7 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, static int validate_table_offset(BlockDriverState *bs, uint64_t offset, uint64_t entries, size_t entry_len) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t size; /* Use signed INT64_MAX as the maximum even for uint64_t header fields, @@ -467,6 +455,11 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Maximum refcount block cache size", }, + { + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, + .type = QEMU_OPT_NUMBER, + .help = "Clean unused cache entries after this time (in seconds)", + }, { /* end of list */ } }, }; @@ -482,11 +475,54 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; +static void cache_clean_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + qcow2_cache_clean_unused(bs, s->l2_table_cache); + qcow2_cache_clean_unused(bs, s->refcount_block_cache); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); +} + +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_interval > 0) { + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, + SCALE_MS, cache_clean_timer_cb, + bs); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); + } +} + +static void cache_clean_timer_del(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_timer) { + timer_del(s->cache_clean_timer); + timer_free(s->cache_clean_timer); + s->cache_clean_timer = NULL; + } +} + +static void qcow2_detach_aio_context(BlockDriverState *bs) +{ + cache_clean_timer_del(bs); +} + +static void qcow2_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + cache_clean_timer_init(bs, new_context); +} + static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t combined_cache_size; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; @@ -541,22 +577,246 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, } } +typedef struct Qcow2ReopenState { + Qcow2Cache *l2_table_cache; + Qcow2Cache *refcount_block_cache; + bool use_lazy_refcounts; + int overlap_check; + bool discard_passthrough[QCOW2_DISCARD_MAX]; + uint64_t cache_clean_interval; +} Qcow2ReopenState; + +static int qcow2_update_options_prepare(BlockDriverState *bs, + Qcow2ReopenState *r, + QDict *options, int flags, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QemuOpts *opts = NULL; + const char *opt_overlap_check, *opt_overlap_check_template; + int overlap_check_template = 0; + uint64_t l2_cache_size, refcount_cache_size; + int i; + Error *local_err = NULL; + int ret; + + opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + /* get L2 table/refcount block cache size from command line options */ + read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + l2_cache_size /= s->cluster_size; + if (l2_cache_size < MIN_L2_CACHE_SIZE) { + l2_cache_size = MIN_L2_CACHE_SIZE; + } + if (l2_cache_size > INT_MAX) { + error_setg(errp, "L2 cache size too big"); + ret = -EINVAL; + goto fail; + } + + refcount_cache_size /= s->cluster_size; + if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { + refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; + } + if (refcount_cache_size > INT_MAX) { + error_setg(errp, "Refcount cache size too big"); + ret = -EINVAL; + goto fail; + } + + /* alloc new L2 table/refcount block cache, flush old one */ + if (s->l2_table_cache) { + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret) { + error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); + goto fail; + } + } + + if (s->refcount_block_cache) { + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret) { + error_setg_errno(errp, -ret, + "Failed to flush the refcount block cache"); + goto fail; + } + } + + r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size); + r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size); + if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { + error_setg(errp, "Could not allocate metadata caches"); + ret = -ENOMEM; + goto fail; + } + + /* New interval for cache cleanup timer */ + r->cache_clean_interval = + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, + s->cache_clean_interval); + if (r->cache_clean_interval > UINT_MAX) { + error_setg(errp, "Cache clean interval too big"); + ret = -EINVAL; + goto fail; + } + + /* lazy-refcounts; flush if going from enabled to disabled */ + r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, + (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); + if (r->use_lazy_refcounts && s->qcow_version < 3) { + error_setg(errp, "Lazy refcounts require a qcow2 image with at least " + "qemu 1.1 compatibility level"); + ret = -EINVAL; + goto fail; + } + + if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { + ret = qcow2_mark_clean(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); + goto fail; + } + } + + /* Overlap check options */ + opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); + opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); + if (opt_overlap_check_template && opt_overlap_check && + strcmp(opt_overlap_check_template, opt_overlap_check)) + { + error_setg(errp, "Conflicting values for qcow2 options '" + QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE + "' ('%s')", opt_overlap_check, opt_overlap_check_template); + ret = -EINVAL; + goto fail; + } + if (!opt_overlap_check) { + opt_overlap_check = opt_overlap_check_template ?: "cached"; + } + + if (!strcmp(opt_overlap_check, "none")) { + overlap_check_template = 0; + } else if (!strcmp(opt_overlap_check, "constant")) { + overlap_check_template = QCOW2_OL_CONSTANT; + } else if (!strcmp(opt_overlap_check, "cached")) { + overlap_check_template = QCOW2_OL_CACHED; + } else if (!strcmp(opt_overlap_check, "all")) { + overlap_check_template = QCOW2_OL_ALL; + } else { + error_setg(errp, "Unsupported value '%s' for qcow2 option " + "'overlap-check'. Allowed are any of the following: " + "none, constant, cached, all", opt_overlap_check); + ret = -EINVAL; + goto fail; + } + + r->overlap_check = 0; + for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { + /* overlap-check defines a template bitmask, but every flag may be + * overwritten through the associated boolean option */ + r->overlap_check |= + qemu_opt_get_bool(opts, overlap_bool_option_names[i], + overlap_check_template & (1 << i)) << i; + } + + r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; + r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; + r->discard_passthrough[QCOW2_DISCARD_REQUEST] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, + flags & BDRV_O_UNMAP); + r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); + r->discard_passthrough[QCOW2_DISCARD_OTHER] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); + + ret = 0; +fail: + qemu_opts_del(opts); + opts = NULL; + return ret; +} + +static void qcow2_update_options_commit(BlockDriverState *bs, + Qcow2ReopenState *r) +{ + BDRVQcow2State *s = bs->opaque; + int i; + + if (s->l2_table_cache) { + qcow2_cache_destroy(bs, s->l2_table_cache); + } + if (s->refcount_block_cache) { + qcow2_cache_destroy(bs, s->refcount_block_cache); + } + s->l2_table_cache = r->l2_table_cache; + s->refcount_block_cache = r->refcount_block_cache; + + s->overlap_check = r->overlap_check; + s->use_lazy_refcounts = r->use_lazy_refcounts; + + for (i = 0; i < QCOW2_DISCARD_MAX; i++) { + s->discard_passthrough[i] = r->discard_passthrough[i]; + } + + if (s->cache_clean_interval != r->cache_clean_interval) { + cache_clean_timer_del(bs); + s->cache_clean_interval = r->cache_clean_interval; + cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); + } +} + +static void qcow2_update_options_abort(BlockDriverState *bs, + Qcow2ReopenState *r) +{ + if (r->l2_table_cache) { + qcow2_cache_destroy(bs, r->l2_table_cache); + } + if (r->refcount_block_cache) { + qcow2_cache_destroy(bs, r->refcount_block_cache); + } +} + +static int qcow2_update_options(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +{ + Qcow2ReopenState r = {}; + int ret; + + ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); + if (ret >= 0) { + qcow2_update_options_commit(bs, &r); + } else { + qcow2_update_options_abort(bs, &r); + } + + return ret; +} + static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; unsigned int len, i; int ret = 0; QCowHeader header; - QemuOpts *opts = NULL; Error *local_err = NULL; uint64_t ext_end; uint64_t l1_vm_state_index; - const char *opt_overlap_check, *opt_overlap_check_template; - int overlap_check_template = 0; - uint64_t l2_cache_size, refcount_cache_size; - ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); + ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read qcow2 header"); goto fail; @@ -581,7 +841,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } if (header.version < 2 || header.version > 3) { - report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version); + error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); ret = -ENOTSUP; goto fail; } @@ -631,7 +891,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (header.header_length > sizeof(header)) { s->unknown_header_fields_size = header.header_length - sizeof(header); s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); - ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, + ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields, s->unknown_header_fields_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " @@ -661,7 +921,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, void *feature_table = NULL; qcow2_read_extensions(bs, header.header_length, ext_end, &feature_table, NULL); - report_unsupported_feature(bs, errp, feature_table, + report_unsupported_feature(errp, feature_table, s->incompatible_features & ~QCOW2_INCOMPAT_MASK); ret = -ENOTSUP; @@ -705,6 +965,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } s->crypt_method_header = header.crypt_method; if (s->crypt_method_header) { + if (bdrv_uses_whitelist() && + s->crypt_method_header == QCOW_CRYPT_AES) { + error_report("qcow2 built-in AES encryption is deprecated"); + error_printf("Support for it will be removed in a future release.\n" + "You can use 'qemu-img convert' to switch to an\n" + "unencrypted qcow2 image, or a LUKS raw image.\n"); + } + bs->encrypted = 1; } @@ -784,14 +1052,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (s->l1_size > 0) { - s->l1_table = qemu_try_blockalign(bs->file, + s->l1_table = qemu_try_blockalign(bs->file->bs, align_offset(s->l1_size * sizeof(uint64_t), 512)); if (s->l1_table == NULL) { error_setg(errp, "Could not allocate L1 table"); ret = -ENOMEM; goto fail; } - ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, + ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read L1 table"); @@ -802,55 +1070,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } } - /* get L2 table/refcount block cache size from command line options */ - opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, options, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto fail; - } - - read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto fail; - } - - l2_cache_size /= s->cluster_size; - if (l2_cache_size < MIN_L2_CACHE_SIZE) { - l2_cache_size = MIN_L2_CACHE_SIZE; - } - if (l2_cache_size > INT_MAX) { - error_setg(errp, "L2 cache size too big"); - ret = -EINVAL; - goto fail; - } - - refcount_cache_size /= s->cluster_size; - if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { - refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; - } - if (refcount_cache_size > INT_MAX) { - error_setg(errp, "Refcount cache size too big"); - ret = -EINVAL; - goto fail; - } - - /* alloc L2 table/refcount block cache */ - s->l2_table_cache = qcow2_cache_create(bs, l2_cache_size); - s->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size); - if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) { - error_setg(errp, "Could not allocate metadata caches"); - ret = -ENOMEM; + /* Parse driver-specific options */ + ret = qcow2_update_options(bs, options, flags, errp); + if (ret < 0) { goto fail; } s->cluster_cache = g_malloc(s->cluster_size); /* one more sector for decompressed data alignment */ - s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS + s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512); if (s->cluster_data == NULL) { error_setg(errp, "Could not allocate temporary cluster buffer"); @@ -887,7 +1115,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto fail; } - ret = bdrv_pread(bs->file, header.backing_file_offset, + ret = bdrv_pread(bs->file->bs, header.backing_file_offset, bs->backing_file, len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read backing file name"); @@ -908,7 +1136,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } /* Clear unknown autoclear feature bits */ - if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) { + if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) { s->autoclear_features = 0; ret = qcow2_update_header(bs); if (ret < 0) { @@ -921,7 +1149,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_co_mutex_init(&s->lock); /* Repair image if dirty */ - if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only && + if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { BdrvCheckResult result = {0}; @@ -932,70 +1160,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } } - /* Enable lazy_refcounts according to image and command line options */ - s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, - (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); - - s->discard_passthrough[QCOW2_DISCARD_NEVER] = false; - s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; - s->discard_passthrough[QCOW2_DISCARD_REQUEST] = - qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, - flags & BDRV_O_UNMAP); - s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = - qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); - s->discard_passthrough[QCOW2_DISCARD_OTHER] = - qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); - - opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); - opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); - if (opt_overlap_check_template && opt_overlap_check && - strcmp(opt_overlap_check_template, opt_overlap_check)) - { - error_setg(errp, "Conflicting values for qcow2 options '" - QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE - "' ('%s')", opt_overlap_check, opt_overlap_check_template); - ret = -EINVAL; - goto fail; - } - if (!opt_overlap_check) { - opt_overlap_check = opt_overlap_check_template ?: "cached"; - } - - if (!strcmp(opt_overlap_check, "none")) { - overlap_check_template = 0; - } else if (!strcmp(opt_overlap_check, "constant")) { - overlap_check_template = QCOW2_OL_CONSTANT; - } else if (!strcmp(opt_overlap_check, "cached")) { - overlap_check_template = QCOW2_OL_CACHED; - } else if (!strcmp(opt_overlap_check, "all")) { - overlap_check_template = QCOW2_OL_ALL; - } else { - error_setg(errp, "Unsupported value '%s' for qcow2 option " - "'overlap-check'. Allowed are either of the following: " - "none, constant, cached, all", opt_overlap_check); - ret = -EINVAL; - goto fail; - } - - s->overlap_check = 0; - for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { - /* overlap-check defines a template bitmask, but every flag may be - * overwritten through the associated boolean option */ - s->overlap_check |= - qemu_opt_get_bool(opts, overlap_bool_option_names[i], - overlap_check_template & (1 << i)) << i; - } - - qemu_opts_del(opts); - opts = NULL; - - if (s->use_lazy_refcounts && s->qcow_version < 3) { - error_setg(errp, "Lazy refcounts require a qcow2 image with at least " - "qemu 1.1 compatibility level"); - ret = -EINVAL; - goto fail; - } - #ifdef DEBUG_ALLOC { BdrvCheckResult result = {0}; @@ -1005,7 +1169,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, return ret; fail: - qemu_opts_del(opts); g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); qcow2_free_snapshots(bs); @@ -1013,6 +1176,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; + cache_clean_timer_del(bs); if (s->l2_table_cache) { qcow2_cache_destroy(bs, s->l2_table_cache); } @@ -1026,14 +1190,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bs->bl.write_zeroes_alignment = s->cluster_sectors; } static int qcow2_set_key(BlockDriverState *bs, const char *key) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint8_t keybuf[16]; int len, i; Error *err = NULL; @@ -1066,32 +1230,104 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key) return 0; } -/* We have no actual commit/abort logic for qcow2, but we need to write out any - * unwritten data if we reopen read-only. */ static int qcow2_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { + Qcow2ReopenState *r; int ret; + r = g_new0(Qcow2ReopenState, 1); + state->opaque = r; + + ret = qcow2_update_options_prepare(state->bs, r, state->options, + state->flags, errp); + if (ret < 0) { + goto fail; + } + + /* We need to write out any unwritten data if we reopen read-only. */ if ((state->flags & BDRV_O_RDWR) == 0) { ret = bdrv_flush(state->bs); if (ret < 0) { - return ret; + goto fail; } ret = qcow2_mark_clean(state->bs); if (ret < 0) { - return ret; + goto fail; } } return 0; + +fail: + qcow2_update_options_abort(state->bs, r); + g_free(r); + return ret; +} + +static void qcow2_reopen_commit(BDRVReopenState *state) +{ + qcow2_update_options_commit(state->bs, state->opaque); + g_free(state->opaque); +} + +static void qcow2_reopen_abort(BDRVReopenState *state) +{ + qcow2_update_options_abort(state->bs, state->opaque); + g_free(state->opaque); +} + +static void qcow2_join_options(QDict *options, QDict *old_options) +{ + bool has_new_overlap_template = + qdict_haskey(options, QCOW2_OPT_OVERLAP) || + qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); + bool has_new_total_cache_size = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); + bool has_all_cache_options; + + /* New overlap template overrides all old overlap options */ + if (has_new_overlap_template) { + qdict_del(old_options, QCOW2_OPT_OVERLAP); + qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); + qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); + } + + /* New total cache size overrides all old options */ + if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { + qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); + qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + } + + qdict_join(options, old_options, false); + + /* + * If after merging all cache size options are set, an old total size is + * overwritten. Do keep all options, however, if all three are new. The + * resulting error message is what we want to happen. + */ + has_all_cache_options = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + + if (has_all_cache_options && !has_new_total_cache_size) { + qdict_del(options, QCOW2_OPT_CACHE_SIZE); + } } static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum) + int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t cluster_offset; int index_in_cluster, ret; int64_t status = 0; @@ -1108,6 +1344,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, !s->cipher) { index_in_cluster = sector_num & (s->cluster_sectors - 1); cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); + *file = bs->file->bs; status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; } if (ret == QCOW2_CLUSTER_ZERO) { @@ -1138,7 +1375,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, QEMUIOVector *qiov) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int index_in_cluster, n1; int ret; int cur_nr_sectors; /* number of sectors in current iteration */ @@ -1175,9 +1412,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, switch (ret) { case QCOW2_CLUSTER_UNALLOCATED: - if (bs->backing_hd) { + if (bs->backing) { /* read from the base image */ - n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov, + n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, sector_num, cur_nr_sectors); if (n1 > 0) { QEMUIOVector local_qiov; @@ -1188,7 +1425,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->backing_hd, sector_num, + ret = bdrv_co_readv(bs->backing->bs, sector_num, n1, &local_qiov); qemu_co_mutex_lock(&s->lock); @@ -1235,8 +1472,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, */ if (!cluster_data) { cluster_data = - qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS - * s->cluster_size); + qemu_try_blockalign(bs->file->bs, + QCOW_MAX_CRYPT_CLUSTERS + * s->cluster_size); if (cluster_data == NULL) { ret = -ENOMEM; goto fail; @@ -1252,7 +1490,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->file, + ret = bdrv_co_readv(bs->file->bs, (cluster_offset >> 9) + index_in_cluster, cur_nr_sectors, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -1300,7 +1538,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int remaining_sectors, QEMUIOVector *qiov) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int index_in_cluster; int ret; int cur_nr_sectors; /* number of sectors in current iteration */ @@ -1349,7 +1587,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, Error *err = NULL; assert(s->cipher); if (!cluster_data) { - cluster_data = qemu_try_blockalign(bs->file, + cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); if (cluster_data == NULL) { @@ -1386,7 +1624,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), (cluster_offset >> 9) + index_in_cluster); - ret = bdrv_co_writev(bs->file, + ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + index_in_cluster, cur_nr_sectors, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -1444,33 +1682,44 @@ fail: return ret; } +static int qcow2_inactivate(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int ret, result = 0; + + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret) { + result = ret; + error_report("Failed to flush the L2 table cache: %s", + strerror(-ret)); + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret) { + result = ret; + error_report("Failed to flush the refcount block cache: %s", + strerror(-ret)); + } + + if (result == 0) { + qcow2_mark_clean(bs); + } + + return result; +} + static void qcow2_close(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; - if (!(bs->open_flags & BDRV_O_INCOMING)) { - int ret1, ret2; - - ret1 = qcow2_cache_flush(bs, s->l2_table_cache); - ret2 = qcow2_cache_flush(bs, s->refcount_block_cache); - - if (ret1) { - error_report("Failed to flush the L2 table cache: %s", - strerror(-ret1)); - } - if (ret2) { - error_report("Failed to flush the refcount block cache: %s", - strerror(-ret2)); - } - - if (!ret1 && !ret2) { - qcow2_mark_clean(bs); - } + if (!(s->flags & BDRV_O_INACTIVE)) { + qcow2_inactivate(bs); } + cache_clean_timer_del(bs); qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); @@ -1491,7 +1740,7 @@ static void qcow2_close(BlockDriverState *bs) static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int flags = s->flags; QCryptoCipher *cipher = NULL; QDict *options; @@ -1508,24 +1757,27 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) qcow2_close(bs); - bdrv_invalidate_cache(bs->file, &local_err); + bdrv_invalidate_cache(bs->file->bs, &local_err); if (local_err) { error_propagate(errp, local_err); + bs->drv = NULL; return; } - memset(s, 0, sizeof(BDRVQcowState)); + memset(s, 0, sizeof(BDRVQcow2State)); options = qdict_clone_shallow(bs->options); + flags &= ~BDRV_O_INACTIVE; ret = qcow2_open(bs, options, flags, &local_err); QDECREF(options); if (local_err) { - error_setg(errp, "Could not reopen qcow2 layer: %s", - error_get_pretty(local_err)); - error_free(local_err); + error_propagate(errp, local_err); + error_prepend(errp, "Could not reopen qcow2 layer: "); + bs->drv = NULL; return; } else if (ret < 0) { error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); + bs->drv = NULL; return; } @@ -1561,7 +1813,7 @@ static size_t header_ext_add(char *buf, uint32_t magic, const void *s, */ int qcow2_update_header(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowHeader *header; char *buf; size_t buflen = s->cluster_size; @@ -1653,31 +1905,33 @@ int qcow2_update_header(BlockDriverState *bs) } /* Feature table */ - Qcow2Feature features[] = { - { - .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, - .bit = QCOW2_INCOMPAT_DIRTY_BITNR, - .name = "dirty bit", - }, - { - .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, - .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, - .name = "corrupt bit", - }, - { - .type = QCOW2_FEAT_TYPE_COMPATIBLE, - .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, - .name = "lazy refcounts", - }, - }; + if (s->qcow_version >= 3) { + Qcow2Feature features[] = { + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_DIRTY_BITNR, + .name = "dirty bit", + }, + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, + .name = "corrupt bit", + }, + { + .type = QCOW2_FEAT_TYPE_COMPATIBLE, + .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, + .name = "lazy refcounts", + }, + }; - ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, - features, sizeof(features), buflen); - if (ret < 0) { - goto fail; + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, + features, sizeof(features), buflen); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; } - buf += ret; - buflen -= ret; /* Keep unknown header extensions */ QLIST_FOREACH(uext, &s->unknown_header_ext, next) { @@ -1716,7 +1970,7 @@ int qcow2_update_header(BlockDriverState *bs) } /* Write the new header */ - ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); + ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size); if (ret < 0) { goto fail; } @@ -1730,7 +1984,11 @@ fail: static int qcow2_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; + + if (backing_file && strlen(backing_file) > 1023) { + return -EINVAL; + } pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); @@ -1796,7 +2054,8 @@ static int preallocate(BlockDriverState *bs) if (host_offset != 0) { uint8_t buf[BDRV_SECTOR_SIZE]; memset(buf, 0, BDRV_SECTOR_SIZE); - ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1, + ret = bdrv_write(bs->file->bs, + (host_offset >> BDRV_SECTOR_BITS) + num - 1, buf, 1); if (ret < 0) { return ret; @@ -1812,8 +2071,10 @@ static int qcow2_create2(const char *filename, int64_t total_size, QemuOpts *opts, int version, int refcount_order, Error **errp) { - /* Calculate cluster_bits */ int cluster_bits; + QDict *options; + + /* Calculate cluster_bits */ cluster_bits = ctz32(cluster_size); if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || (1 << cluster_bits) != cluster_size) @@ -1835,7 +2096,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file * size for any qcow2 image. */ - BlockDriverState* bs; + BlockBackend *blk; QCowHeader *header; uint64_t* refcount_table; Error *local_err = NULL; @@ -1910,14 +2171,15 @@ static int qcow2_create2(const char *filename, int64_t total_size, return ret; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, &local_err); - if (ret < 0) { + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } + blk_set_allow_write_beyond_eof(blk, true); + /* Write the header */ QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); header = g_malloc0(cluster_size); @@ -1945,7 +2207,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); } - ret = bdrv_pwrite(bs, 0, header, cluster_size); + ret = blk_pwrite(blk, 0, header, cluster_size); g_free(header); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write qcow2 header"); @@ -1955,7 +2217,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Write a refcount table with one refcount block */ refcount_table = g_malloc0(2 * cluster_size); refcount_table[0] = cpu_to_be64(2 * cluster_size); - ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size); + ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size); g_free(refcount_table); if (ret < 0) { @@ -1963,23 +2225,25 @@ static int qcow2_create2(const char *filename, int64_t total_size, goto out; } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* * And now open the image and make it consistent first (i.e. increase the * refcount of the cluster that is occupied by the header and the refcount * table) */ - ret = bdrv_open(&bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - &bdrv_qcow2, &local_err); - if (ret < 0) { + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str("qcow2")); + blk = blk_new_open(filename, NULL, options, + BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } - ret = qcow2_alloc_clusters(bs, 3 * cluster_size); + ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " "header and refcount table"); @@ -1990,8 +2254,15 @@ static int qcow2_create2(const char *filename, int64_t total_size, abort(); } + /* Create a full header (including things like feature table) */ + ret = qcow2_update_header(blk_bs(blk)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not update qcow2 header"); + goto out; + } + /* Okay, now that we have a valid image, let's give it the right size */ - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not resize image"); goto out; @@ -1999,7 +2270,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Want a backing file? There you go.*/ if (backing_file) { - ret = bdrv_change_backing_file(bs, backing_file, backing_format); + ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format); if (ret < 0) { error_setg_errno(errp, -ret, "Could not assign backing file '%s' " "with format '%s'", backing_file, backing_format); @@ -2009,9 +2280,9 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* And if we're supposed to preallocate metadata, do that now */ if (prealloc != PREALLOC_MODE_OFF) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = blk_bs(blk)->opaque; qemu_co_mutex_lock(&s->lock); - ret = preallocate(bs); + ret = preallocate(blk_bs(blk)); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { error_setg_errno(errp, -ret, "Could not preallocate metadata"); @@ -2019,22 +2290,24 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ - ret = bdrv_open(&bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, - &bdrv_qcow2, &local_err); - if (local_err) { + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str("qcow2")); + blk = blk_new_open(filename, NULL, options, + BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } ret = 0; out: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } return ret; } @@ -2066,7 +2339,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) DEFAULT_CLUSTER_SIZE); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(PreallocMode_lookup, buf, - PREALLOC_MODE_MAX, PREALLOC_MODE_OFF, + PREALLOC_MODE__MAX, PREALLOC_MODE_OFF, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -2142,7 +2415,7 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; /* Emulate misaligned zero writes */ if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { @@ -2162,7 +2435,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { int ret; - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_co_mutex_lock(&s->lock); ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, @@ -2173,7 +2446,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, static int qcow2_truncate(BlockDriverState *bs, int64_t offset) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t new_l1_size; int ret; @@ -2202,7 +2475,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) /* write updated header.size */ offset = cpu_to_be64(offset); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), + ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size), &offset, sizeof(uint64_t)); if (ret < 0) { return ret; @@ -2217,7 +2490,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; z_stream strm; int ret, out_len; uint8_t *out_buf; @@ -2226,8 +2499,8 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, if (nb_sectors == 0) { /* align end of file to a sector boundary to ease reading with sector based I/Os */ - cluster_offset = bdrv_getlength(bs->file); - return bdrv_truncate(bs->file, cluster_offset); + cluster_offset = bdrv_getlength(bs->file->bs); + return bdrv_truncate(bs->file->bs, cluster_offset); } if (nb_sectors != s->cluster_sectors) { @@ -2294,7 +2567,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); - ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); + ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len); if (ret < 0) { goto fail; } @@ -2308,7 +2581,7 @@ fail: static int make_completely_empty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret, l1_clusters; int64_t offset; uint64_t *new_reftable = NULL; @@ -2343,7 +2616,7 @@ static int make_completely_empty(BlockDriverState *bs) /* After this call, neither the in-memory nor the on-disk refcount * information accurately describe the actual references */ - ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE, + ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE, l1_clusters * s->cluster_sectors, 0); if (ret < 0) { goto fail_broken_refcounts; @@ -2357,7 +2630,7 @@ static int make_completely_empty(BlockDriverState *bs) * overwrite parts of the existing refcount and L1 table, which is not * an issue because the dirty flag is set, complete data loss is in fact * desired and partial data loss is consequently fine as well */ - ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE, + ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE, (2 + l1_clusters) * s->cluster_size / BDRV_SECTOR_SIZE, 0); /* This call (even if it failed overall) may have overwritten on-disk @@ -2377,7 +2650,7 @@ static int make_completely_empty(BlockDriverState *bs) cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size); cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size); cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), + ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset), &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); if (ret < 0) { goto fail_broken_refcounts; @@ -2408,7 +2681,7 @@ static int make_completely_empty(BlockDriverState *bs) /* Enter the first refblock into the reftable */ rt_entry = cpu_to_be64(2 * s->cluster_size); - ret = bdrv_pwrite_sync(bs->file, s->cluster_size, + ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size, &rt_entry, sizeof(rt_entry)); if (ret < 0) { goto fail_broken_refcounts; @@ -2433,7 +2706,7 @@ static int make_completely_empty(BlockDriverState *bs) goto fail; } - ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size); + ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size); if (ret < 0) { goto fail; } @@ -2456,7 +2729,7 @@ fail: static int qcow2_make_empty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t start_sector; int sector_step = INT_MAX / BDRV_SECTOR_SIZE; int l1_clusters, ret = 0; @@ -2497,7 +2770,7 @@ static int qcow2_make_empty(BlockDriverState *bs) static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret; qemu_co_mutex_lock(&s->lock); @@ -2521,7 +2794,7 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bdi->unallocated_blocks_are_zero = true; bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3); bdi->cluster_size = s->cluster_size; @@ -2531,22 +2804,20 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); *spec_info = (ImageInfoSpecific){ - .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2, - { - .qcow2 = g_new(ImageInfoSpecificQCow2, 1), - }, + .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, + .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1), }; if (s->qcow_version == 2) { - *spec_info->qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ .compat = g_strdup("0.10"), .refcount_bits = s->refcount_bits, }; } else if (s->qcow_version == 3) { - *spec_info->qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ .compat = g_strdup("1.1"), .lazy_refcounts = s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS, @@ -2556,6 +2827,10 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) .has_corrupt = true, .refcount_bits = s->refcount_bits, }; + } else { + /* if this assertion fails, this probably means a new version was + * added without having it covered here */ + assert(false); } return spec_info; @@ -2564,11 +2839,11 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) #if 0 static void dump_refcounts(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t nb_clusters, k, k1, size; int refcount; - size = bdrv_getlength(bs->file); + size = bdrv_getlength(bs->file->bs); nb_clusters = size_to_clusters(s, size); for(k = 0; k < nb_clusters;) { k1 = k; @@ -2585,7 +2860,7 @@ static void dump_refcounts(BlockDriverState *bs) static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t total_sectors = bs->total_sectors; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; @@ -2606,7 +2881,7 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; @@ -2623,9 +2898,9 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, * have to be removed. */ static int qcow2_downgrade(BlockDriverState *bs, int target_version, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, void *cb_opaque) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int current_version = s->qcow_version; int ret; @@ -2638,13 +2913,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, } if (s->refcount_order != 4) { - /* we would have to convert the image to a refcount_order == 4 image - * here; however, since qemu (at the time of writing this) does not - * support anything different than 4 anyway, there is no point in doing - * so right now; however, we should error out (if qemu supports this in - * the future and this code has not been adapted) */ - error_report("qcow2_downgrade: Image refcount orders other than 4 are " - "currently not supported."); + error_report("compat=0.10 requires refcount_bits=16"); return -ENOTSUP; } @@ -2672,7 +2941,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, /* clearing autoclear features is trivial */ s->autoclear_features = 0; - ret = qcow2_expand_zero_clusters(bs, status_cb); + ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); if (ret < 0) { return ret; } @@ -2686,10 +2955,81 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, return 0; } +typedef enum Qcow2AmendOperation { + /* This is the value Qcow2AmendHelperCBInfo::last_operation will be + * statically initialized to so that the helper CB can discern the first + * invocation from an operation change */ + QCOW2_NO_OPERATION = 0, + + QCOW2_CHANGING_REFCOUNT_ORDER, + QCOW2_DOWNGRADING, +} Qcow2AmendOperation; + +typedef struct Qcow2AmendHelperCBInfo { + /* The code coordinating the amend operations should only modify + * these four fields; the rest will be managed by the CB */ + BlockDriverAmendStatusCB *original_status_cb; + void *original_cb_opaque; + + Qcow2AmendOperation current_operation; + + /* Total number of operations to perform (only set once) */ + int total_operations; + + /* The following fields are managed by the CB */ + + /* Number of operations completed */ + int operations_completed; + + /* Cumulative offset of all completed operations */ + int64_t offset_completed; + + Qcow2AmendOperation last_operation; + int64_t last_work_size; +} Qcow2AmendHelperCBInfo; + +static void qcow2_amend_helper_cb(BlockDriverState *bs, + int64_t operation_offset, + int64_t operation_work_size, void *opaque) +{ + Qcow2AmendHelperCBInfo *info = opaque; + int64_t current_work_size; + int64_t projected_work_size; + + if (info->current_operation != info->last_operation) { + if (info->last_operation != QCOW2_NO_OPERATION) { + info->offset_completed += info->last_work_size; + info->operations_completed++; + } + + info->last_operation = info->current_operation; + } + + assert(info->total_operations > 0); + assert(info->operations_completed < info->total_operations); + + info->last_work_size = operation_work_size; + + current_work_size = info->offset_completed + operation_work_size; + + /* current_work_size is the total work size for (operations_completed + 1) + * operations (which includes this one), so multiply it by the number of + * operations not covered and divide it by the number of operations + * covered to get a projection for the operations not covered */ + projected_work_size = current_work_size * (info->total_operations - + info->operations_completed - 1) + / (info->operations_completed + 1); + + info->original_status_cb(bs, info->offset_completed + operation_offset, + current_work_size + projected_work_size, + info->original_cb_opaque); +} + static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; uint64_t new_size = 0; const char *backing_file = NULL, *backing_format = NULL; @@ -2697,8 +3037,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, const char *compat = NULL; uint64_t cluster_size = s->cluster_size; bool encrypt; + int refcount_bits = s->refcount_bits; int ret; QemuOptDesc *desc = opts->list->desc; + Qcow2AmendHelperCBInfo helper_cb_info; while (desc && desc->name) { if (!qemu_opt_find(opts, desc->name)) { @@ -2716,11 +3058,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } else if (!strcmp(compat, "1.1")) { new_version = 3; } else { - fprintf(stderr, "Unknown compatibility level %s.\n", compat); + error_report("Unknown compatibility level %s", compat); return -EINVAL; } } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { - fprintf(stderr, "Cannot change preallocation mode.\n"); + error_report("Cannot change preallocation mode"); return -ENOTSUP; } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); @@ -2733,47 +3075,74 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, !!s->cipher); if (encrypt != !!s->cipher) { - fprintf(stderr, "Changing the encryption flag is not " - "supported.\n"); + error_report("Changing the encryption flag is not supported"); return -ENOTSUP; } } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, cluster_size); if (cluster_size != s->cluster_size) { - fprintf(stderr, "Changing the cluster size is not " - "supported.\n"); + error_report("Changing the cluster size is not supported"); return -ENOTSUP; } } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, lazy_refcounts); } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { - error_report("Cannot change refcount entry width"); - return -ENOTSUP; + refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, + refcount_bits); + + if (refcount_bits <= 0 || refcount_bits > 64 || + !is_power_of_2(refcount_bits)) + { + error_report("Refcount width must be a power of two and may " + "not exceed 64 bits"); + return -EINVAL; + } } else { - /* if this assertion fails, this probably means a new option was + /* if this point is reached, this probably means a new option was * added without having it covered here */ - assert(false); + abort(); } desc++; } - if (new_version != old_version) { - if (new_version > old_version) { - /* Upgrade */ - s->qcow_version = new_version; - ret = qcow2_update_header(bs); - if (ret < 0) { - s->qcow_version = old_version; - return ret; - } - } else { - ret = qcow2_downgrade(bs, new_version, status_cb); - if (ret < 0) { - return ret; - } + helper_cb_info = (Qcow2AmendHelperCBInfo){ + .original_status_cb = status_cb, + .original_cb_opaque = cb_opaque, + .total_operations = (new_version < old_version) + + (s->refcount_bits != refcount_bits) + }; + + /* Upgrade first (some features may require compat=1.1) */ + if (new_version > old_version) { + s->qcow_version = new_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = old_version; + return ret; + } + } + + if (s->refcount_bits != refcount_bits) { + int refcount_order = ctz32(refcount_bits); + Error *local_error = NULL; + + if (new_version < 3 && refcount_bits != 16) { + error_report("Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + return -EINVAL; + } + + helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; + ret = qcow2_change_refcount_order(bs, refcount_order, + &qcow2_amend_helper_cb, + &helper_cb_info, &local_error); + if (ret < 0) { + error_report_err(local_error); + return ret; } } @@ -2788,9 +3157,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, if (s->use_lazy_refcounts != lazy_refcounts) { if (lazy_refcounts) { - if (s->qcow_version < 3) { - fprintf(stderr, "Lazy refcounts only supported with compatibility " - "level 1.1 and above (use compat=1.1 or greater)\n"); + if (new_version < 3) { + error_report("Lazy refcounts only supported with compatibility " + "level 1.1 and above (use compat=1.1 or greater)"); return -EINVAL; } s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; @@ -2824,6 +3193,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } } + /* Downgrade last (so unsupported features can be removed before) */ + if (new_version < old_version) { + helper_cb_info.current_operation = QCOW2_DOWNGRADING; + ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, + &helper_cb_info); + if (ret < 0) { + return ret; + } + } + return 0; } @@ -2836,7 +3215,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, int64_t size, const char *message_format, ...) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; const char *node_name; char *message; va_list ap; @@ -2937,11 +3316,14 @@ static QemuOptsList qcow2_create_opts = { BlockDriver bdrv_qcow2 = { .format_name = "qcow2", - .instance_size = sizeof(BDRVQcowState), + .instance_size = sizeof(BDRVQcow2State), .bdrv_probe = qcow2_probe, .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, .bdrv_reopen_prepare = qcow2_reopen_prepare, + .bdrv_reopen_commit = qcow2_reopen_commit, + .bdrv_reopen_abort = qcow2_reopen_abort, + .bdrv_join_options = qcow2_join_options, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, @@ -2973,10 +3355,14 @@ BlockDriver bdrv_qcow2 = { .bdrv_refresh_limits = qcow2_refresh_limits, .bdrv_invalidate_cache = qcow2_invalidate_cache, + .bdrv_inactivate = qcow2_inactivate, .create_opts = &qcow2_create_opts, .bdrv_check = qcow2_check, .bdrv_amend_options = qcow2_amend_options, + + .bdrv_detach_aio_context = qcow2_detach_aio_context, + .bdrv_attach_aio_context = qcow2_attach_aio_context, }; static void bdrv_qcow2_init(void)