2 * Block driver for the QCOW version 2 format
4 * Copyright (c) 2004-2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu-common.h"
26 #include "block/block_int.h"
27 #include "block/qcow2.h"
28 #include "qemu/error-report.h"
30 void qcow2_free_snapshots(BlockDriverState *bs)
32 BDRVQcowState *s = bs->opaque;
35 for(i = 0; i < s->nb_snapshots; i++) {
36 g_free(s->snapshots[i].name);
37 g_free(s->snapshots[i].id_str);
44 int qcow2_read_snapshots(BlockDriverState *bs)
46 BDRVQcowState *s = bs->opaque;
48 QCowSnapshotExtraData extra;
50 int i, id_str_size, name_size;
52 uint32_t extra_data_size;
55 if (!s->nb_snapshots) {
57 s->snapshots_size = 0;
61 offset = s->snapshots_offset;
62 s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
64 for(i = 0; i < s->nb_snapshots; i++) {
65 /* Read statically sized part of the snapshot header */
66 offset = align_offset(offset, 8);
67 ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
73 sn = s->snapshots + i;
74 sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
75 sn->l1_size = be32_to_cpu(h.l1_size);
76 sn->vm_state_size = be32_to_cpu(h.vm_state_size);
77 sn->date_sec = be32_to_cpu(h.date_sec);
78 sn->date_nsec = be32_to_cpu(h.date_nsec);
79 sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
80 extra_data_size = be32_to_cpu(h.extra_data_size);
82 id_str_size = be16_to_cpu(h.id_str_size);
83 name_size = be16_to_cpu(h.name_size);
86 ret = bdrv_pread(bs->file, offset, &extra,
87 MIN(sizeof(extra), extra_data_size));
91 offset += extra_data_size;
93 if (extra_data_size >= 8) {
94 sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
97 if (extra_data_size >= 16) {
98 sn->disk_size = be64_to_cpu(extra.disk_size);
100 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
103 /* Read snapshot ID */
104 sn->id_str = g_malloc(id_str_size + 1);
105 ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
109 offset += id_str_size;
110 sn->id_str[id_str_size] = '\0';
112 /* Read snapshot name */
113 sn->name = g_malloc(name_size + 1);
114 ret = bdrv_pread(bs->file, offset, sn->name, name_size);
119 sn->name[name_size] = '\0';
121 if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
127 assert(offset - s->snapshots_offset <= INT_MAX);
128 s->snapshots_size = offset - s->snapshots_offset;
132 qcow2_free_snapshots(bs);
136 /* add at the end of the file a new list of snapshots */
137 static int qcow2_write_snapshots(BlockDriverState *bs)
139 BDRVQcowState *s = bs->opaque;
141 QCowSnapshotHeader h;
142 QCowSnapshotExtraData extra;
143 int i, name_size, id_str_size, snapshots_size;
145 uint32_t nb_snapshots;
146 uint64_t snapshots_offset;
147 } QEMU_PACKED header_data;
148 int64_t offset, snapshots_offset = 0;
151 /* compute the size of the snapshots */
153 for(i = 0; i < s->nb_snapshots; i++) {
154 sn = s->snapshots + i;
155 offset = align_offset(offset, 8);
157 offset += sizeof(extra);
158 offset += strlen(sn->id_str);
159 offset += strlen(sn->name);
161 if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
167 assert(offset <= INT_MAX);
168 snapshots_size = offset;
170 /* Allocate space for the new snapshot list */
171 snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
172 offset = snapshots_offset;
177 ret = bdrv_flush(bs);
182 /* The snapshot list position has not yet been updated, so these clusters
183 * must indeed be completely free */
184 ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size);
190 /* Write all snapshots to the new list */
191 for(i = 0; i < s->nb_snapshots; i++) {
192 sn = s->snapshots + i;
193 memset(&h, 0, sizeof(h));
194 h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
195 h.l1_size = cpu_to_be32(sn->l1_size);
196 /* If it doesn't fit in 32 bit, older implementations should treat it
197 * as a disk-only snapshot rather than truncate the VM state */
198 if (sn->vm_state_size <= 0xffffffff) {
199 h.vm_state_size = cpu_to_be32(sn->vm_state_size);
201 h.date_sec = cpu_to_be32(sn->date_sec);
202 h.date_nsec = cpu_to_be32(sn->date_nsec);
203 h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
204 h.extra_data_size = cpu_to_be32(sizeof(extra));
206 memset(&extra, 0, sizeof(extra));
207 extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
208 extra.disk_size = cpu_to_be64(sn->disk_size);
210 id_str_size = strlen(sn->id_str);
211 name_size = strlen(sn->name);
212 assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
213 h.id_str_size = cpu_to_be16(id_str_size);
214 h.name_size = cpu_to_be16(name_size);
215 offset = align_offset(offset, 8);
217 ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
223 ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
227 offset += sizeof(extra);
229 ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
233 offset += id_str_size;
235 ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
243 * Update the header to point to the new snapshot table. This requires the
244 * new table and its refcounts to be stable on disk.
246 ret = bdrv_flush(bs);
251 QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
252 offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
254 header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
255 header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
257 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
258 &header_data, sizeof(header_data));
263 /* free the old snapshot table */
264 qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
265 QCOW2_DISCARD_SNAPSHOT);
266 s->snapshots_offset = snapshots_offset;
267 s->snapshots_size = snapshots_size;
271 if (snapshots_offset > 0) {
272 qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
273 QCOW2_DISCARD_ALWAYS);
278 static void find_new_snapshot_id(BlockDriverState *bs,
279 char *id_str, int id_str_size)
281 BDRVQcowState *s = bs->opaque;
284 unsigned long id, id_max = 0;
286 for(i = 0; i < s->nb_snapshots; i++) {
287 sn = s->snapshots + i;
288 id = strtoul(sn->id_str, NULL, 10);
292 snprintf(id_str, id_str_size, "%lu", id_max + 1);
295 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
299 BDRVQcowState *s = bs->opaque;
303 for (i = 0; i < s->nb_snapshots; i++) {
304 if (!strcmp(s->snapshots[i].id_str, id) &&
305 !strcmp(s->snapshots[i].name, name)) {
310 for (i = 0; i < s->nb_snapshots; i++) {
311 if (!strcmp(s->snapshots[i].id_str, id)) {
316 for (i = 0; i < s->nb_snapshots; i++) {
317 if (!strcmp(s->snapshots[i].name, name)) {
326 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
327 const char *id_or_name)
331 ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
335 return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
338 /* if no id is provided, a new one is constructed */
339 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
341 BDRVQcowState *s = bs->opaque;
342 QCowSnapshot *new_snapshot_list = NULL;
343 QCowSnapshot *old_snapshot_list = NULL;
344 QCowSnapshot sn1, *sn = &sn1;
346 uint64_t *l1_table = NULL;
347 int64_t l1_table_offset;
349 if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
353 memset(sn, 0, sizeof(*sn));
356 find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
358 /* Check that the ID is unique */
359 if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
363 /* Populate sn with passed data */
364 sn->id_str = g_strdup(sn_info->id_str);
365 sn->name = g_strdup(sn_info->name);
367 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
368 sn->vm_state_size = sn_info->vm_state_size;
369 sn->date_sec = sn_info->date_sec;
370 sn->date_nsec = sn_info->date_nsec;
371 sn->vm_clock_nsec = sn_info->vm_clock_nsec;
373 /* Allocate the L1 table of the snapshot and copy the current one there. */
374 l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
375 if (l1_table_offset < 0) {
376 ret = l1_table_offset;
380 sn->l1_table_offset = l1_table_offset;
381 sn->l1_size = s->l1_size;
383 l1_table = g_try_new(uint64_t, s->l1_size);
384 if (s->l1_size && l1_table == NULL) {
389 for(i = 0; i < s->l1_size; i++) {
390 l1_table[i] = cpu_to_be64(s->l1_table[i]);
393 ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
394 s->l1_size * sizeof(uint64_t));
399 ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
400 s->l1_size * sizeof(uint64_t));
409 * Increase the refcounts of all clusters and make sure everything is
410 * stable on disk before updating the snapshot table to contain a pointer
411 * to the new L1 table.
413 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
418 /* Append the new snapshot to the snapshot list */
419 new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
421 memcpy(new_snapshot_list, s->snapshots,
422 s->nb_snapshots * sizeof(QCowSnapshot));
423 old_snapshot_list = s->snapshots;
425 s->snapshots = new_snapshot_list;
426 s->snapshots[s->nb_snapshots++] = *sn;
428 ret = qcow2_write_snapshots(bs);
430 g_free(s->snapshots);
431 s->snapshots = old_snapshot_list;
436 g_free(old_snapshot_list);
438 /* The VM state isn't needed any more in the active L1 table; in fact, it
439 * hurts by causing expensive COW for the next snapshot. */
440 qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
441 align_offset(sn->vm_state_size, s->cluster_size)
443 QCOW2_DISCARD_NEVER, false);
447 BdrvCheckResult result = {0};
448 qcow2_check_refcounts(bs, &result, 0);
461 /* copy the snapshot 'snapshot_name' into the current disk image */
462 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
464 BDRVQcowState *s = bs->opaque;
466 int i, snapshot_index;
467 int cur_l1_bytes, sn_l1_bytes;
469 uint64_t *sn_l1_table = NULL;
471 /* Search the snapshot */
472 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
473 if (snapshot_index < 0) {
476 sn = &s->snapshots[snapshot_index];
478 if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
479 error_report("qcow2: Loading snapshots with different disk "
480 "size is not implemented");
486 * Make sure that the current L1 table is big enough to contain the whole
487 * L1 table of the snapshot. If the snapshot L1 table is smaller, the
488 * current one must be padded with zeros.
490 ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
495 cur_l1_bytes = s->l1_size * sizeof(uint64_t);
496 sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
499 * Copy the snapshot L1 table to the current L1 table.
501 * Before overwriting the old current L1 table on disk, make sure to
502 * increase all refcounts for the clusters referenced by the new one.
503 * Decrease the refcount referenced by the old one only when the L1
504 * table is overwritten.
506 sn_l1_table = g_try_malloc0(cur_l1_bytes);
507 if (cur_l1_bytes && sn_l1_table == NULL) {
512 ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
517 ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
523 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
524 s->l1_table_offset, cur_l1_bytes);
529 ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
536 * Decrease refcount of clusters of current L1 table.
538 * At this point, the in-memory s->l1_table points to the old L1 table,
539 * whereas on disk we already have the new one.
541 * qcow2_update_snapshot_refcount special cases the current L1 table to use
542 * the in-memory data instead of really using the offset to load a new one,
543 * which is why this works.
545 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
549 * Now update the in-memory L1 table to be in sync with the on-disk one. We
550 * need to do this even if updating refcounts failed.
552 for(i = 0;i < s->l1_size; i++) {
553 s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
564 * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
565 * when we decreased the refcount of the old snapshot.
567 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
574 BdrvCheckResult result = {0};
575 qcow2_check_refcounts(bs, &result, 0);
585 int qcow2_snapshot_delete(BlockDriverState *bs,
586 const char *snapshot_id,
590 BDRVQcowState *s = bs->opaque;
592 int snapshot_index, ret;
594 /* Search the snapshot */
595 snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
596 if (snapshot_index < 0) {
597 error_setg(errp, "Can't find the snapshot");
600 sn = s->snapshots[snapshot_index];
602 /* Remove it from the snapshot list */
603 memmove(s->snapshots + snapshot_index,
604 s->snapshots + snapshot_index + 1,
605 (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
607 ret = qcow2_write_snapshots(bs);
609 error_setg_errno(errp, -ret,
610 "Failed to remove snapshot from snapshot list");
615 * The snapshot is now unused, clean up. If we fail after this point, we
616 * won't recover but just leak clusters.
622 * Now decrease the refcounts of clusters referenced by the snapshot and
625 ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
628 error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
631 qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
632 QCOW2_DISCARD_SNAPSHOT);
634 /* must update the copied flag on the current cluster offsets */
635 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
637 error_setg_errno(errp, -ret,
638 "Failed to update snapshot status in disk");
644 BdrvCheckResult result = {0};
645 qcow2_check_refcounts(bs, &result, 0);
651 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
653 BDRVQcowState *s = bs->opaque;
654 QEMUSnapshotInfo *sn_tab, *sn_info;
658 if (!s->nb_snapshots) {
660 return s->nb_snapshots;
663 sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
664 for(i = 0; i < s->nb_snapshots; i++) {
665 sn_info = sn_tab + i;
666 sn = s->snapshots + i;
667 pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
669 pstrcpy(sn_info->name, sizeof(sn_info->name),
671 sn_info->vm_state_size = sn->vm_state_size;
672 sn_info->date_sec = sn->date_sec;
673 sn_info->date_nsec = sn->date_nsec;
674 sn_info->vm_clock_nsec = sn->vm_clock_nsec;
677 return s->nb_snapshots;
680 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
681 const char *snapshot_id,
685 int i, snapshot_index;
686 BDRVQcowState *s = bs->opaque;
688 uint64_t *new_l1_table;
692 assert(bs->read_only);
694 /* Search the snapshot */
695 snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
696 if (snapshot_index < 0) {
698 "Can't find snapshot");
701 sn = &s->snapshots[snapshot_index];
703 /* Allocate and read in the snapshot's L1 table */
704 if (sn->l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
705 error_setg(errp, "Snapshot L1 table too large");
708 new_l1_bytes = sn->l1_size * sizeof(uint64_t);
709 new_l1_table = qemu_try_blockalign(bs->file,
710 align_offset(new_l1_bytes, 512));
711 if (new_l1_table == NULL) {
715 ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
717 error_setg(errp, "Failed to read l1 table for snapshot");
718 qemu_vfree(new_l1_table);
722 /* Switch the L1 table */
723 qemu_vfree(s->l1_table);
725 s->l1_size = sn->l1_size;
726 s->l1_table_offset = sn->l1_table_offset;
727 s->l1_table = new_l1_table;
729 for(i = 0;i < s->l1_size; i++) {
730 be64_to_cpus(&s->l1_table[i]);