Add qemu 2.4.0
[kvmfornfv.git] / qemu / block / qcow2-cache.c
diff --git a/qemu/block/qcow2-cache.c b/qemu/block/qcow2-cache.c
new file mode 100644 (file)
index 0000000..53b8afc
--- /dev/null
@@ -0,0 +1,350 @@
+/*
+ * L2/refcount table cache for the QCOW2 format
+ *
+ * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/block_int.h"
+#include "qemu-common.h"
+#include "qcow2.h"
+#include "trace.h"
+
+typedef struct Qcow2CachedTable {
+    int64_t  offset;
+    bool     dirty;
+    uint64_t lru_counter;
+    int      ref;
+} Qcow2CachedTable;
+
+struct Qcow2Cache {
+    Qcow2CachedTable       *entries;
+    struct Qcow2Cache      *depends;
+    int                     size;
+    bool                    depends_on_flush;
+    void                   *table_array;
+    uint64_t                lru_counter;
+};
+
+static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
+                    Qcow2Cache *c, int table)
+{
+    BDRVQcowState *s = bs->opaque;
+    return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
+}
+
+static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
+                  Qcow2Cache *c, void *table)
+{
+    BDRVQcowState *s = bs->opaque;
+    ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
+    int idx = table_offset / s->cluster_size;
+    assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
+    return idx;
+}
+
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
+{
+    BDRVQcowState *s = bs->opaque;
+    Qcow2Cache *c;
+
+    c = g_new0(Qcow2Cache, 1);
+    c->size = num_tables;
+    c->entries = g_try_new0(Qcow2CachedTable, num_tables);
+    c->table_array = qemu_try_blockalign(bs->file,
+                                         (size_t) num_tables * s->cluster_size);
+
+    if (!c->entries || !c->table_array) {
+        qemu_vfree(c->table_array);
+        g_free(c->entries);
+        g_free(c);
+        c = NULL;
+    }
+
+    return c;
+}
+
+int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
+{
+    int i;
+
+    for (i = 0; i < c->size; i++) {
+        assert(c->entries[i].ref == 0);
+    }
+
+    qemu_vfree(c->table_array);
+    g_free(c->entries);
+    g_free(c);
+
+    return 0;
+}
+
+static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
+{
+    int ret;
+
+    ret = qcow2_cache_flush(bs, c->depends);
+    if (ret < 0) {
+        return ret;
+    }
+
+    c->depends = NULL;
+    c->depends_on_flush = false;
+
+    return 0;
+}
+
+static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret = 0;
+
+    if (!c->entries[i].dirty || !c->entries[i].offset) {
+        return 0;
+    }
+
+    trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
+                                  c == s->l2_table_cache, i);
+
+    if (c->depends) {
+        ret = qcow2_cache_flush_dependency(bs, c);
+    } else if (c->depends_on_flush) {
+        ret = bdrv_flush(bs->file);
+        if (ret >= 0) {
+            c->depends_on_flush = false;
+        }
+    }
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (c == s->refcount_block_cache) {
+        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
+                c->entries[i].offset, s->cluster_size);
+    } else if (c == s->l2_table_cache) {
+        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
+                c->entries[i].offset, s->cluster_size);
+    } else {
+        ret = qcow2_pre_write_overlap_check(bs, 0,
+                c->entries[i].offset, s->cluster_size);
+    }
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (c == s->refcount_block_cache) {
+        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
+    } else if (c == s->l2_table_cache) {
+        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+    }
+
+    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
+    if (ret < 0) {
+        return ret;
+    }
+
+    c->entries[i].dirty = false;
+
+    return 0;
+}
+
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+{
+    BDRVQcowState *s = bs->opaque;
+    int result = 0;
+    int ret;
+    int i;
+
+    trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
+
+    for (i = 0; i < c->size; i++) {
+        ret = qcow2_cache_entry_flush(bs, c, i);
+        if (ret < 0 && result != -ENOSPC) {
+            result = ret;
+        }
+    }
+
+    if (result == 0) {
+        ret = bdrv_flush(bs->file);
+        if (ret < 0) {
+            result = ret;
+        }
+    }
+
+    return result;
+}
+
+int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+    Qcow2Cache *dependency)
+{
+    int ret;
+
+    if (dependency->depends) {
+        ret = qcow2_cache_flush_dependency(bs, dependency);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (c->depends && (c->depends != dependency)) {
+        ret = qcow2_cache_flush_dependency(bs, c);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    c->depends = dependency;
+    return 0;
+}
+
+void qcow2_cache_depends_on_flush(Qcow2Cache *c)
+{
+    c->depends_on_flush = true;
+}
+
+int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
+{
+    int ret, i;
+
+    ret = qcow2_cache_flush(bs, c);
+    if (ret < 0) {
+        return ret;
+    }
+
+    for (i = 0; i < c->size; i++) {
+        assert(c->entries[i].ref == 0);
+        c->entries[i].offset = 0;
+        c->entries[i].lru_counter = 0;
+    }
+
+    c->lru_counter = 0;
+
+    return 0;
+}
+
+static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
+    uint64_t offset, void **table, bool read_from_disk)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i;
+    int ret;
+    int lookup_index;
+    uint64_t min_lru_counter = UINT64_MAX;
+    int min_lru_index = -1;
+
+    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
+                          offset, read_from_disk);
+
+    /* Check if the table is already cached */
+    i = lookup_index = (offset / s->cluster_size * 4) % c->size;
+    do {
+        const Qcow2CachedTable *t = &c->entries[i];
+        if (t->offset == offset) {
+            goto found;
+        }
+        if (t->ref == 0 && t->lru_counter < min_lru_counter) {
+            min_lru_counter = t->lru_counter;
+            min_lru_index = i;
+        }
+        if (++i == c->size) {
+            i = 0;
+        }
+    } while (i != lookup_index);
+
+    if (min_lru_index == -1) {
+        /* This can't happen in current synchronous code, but leave the check
+         * here as a reminder for whoever starts using AIO with the cache */
+        abort();
+    }
+
+    /* Cache miss: write a table back and replace it */
+    i = min_lru_index;
+    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
+                                        c == s->l2_table_cache, i);
+
+    ret = qcow2_cache_entry_flush(bs, c, i);
+    if (ret < 0) {
+        return ret;
+    }
+
+    trace_qcow2_cache_get_read(qemu_coroutine_self(),
+                               c == s->l2_table_cache, i);
+    c->entries[i].offset = 0;
+    if (read_from_disk) {
+        if (c == s->l2_table_cache) {
+            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
+        }
+
+        ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+                         s->cluster_size);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    c->entries[i].offset = offset;
+
+    /* And return the right table */
+found:
+    c->entries[i].ref++;
+    *table = qcow2_cache_get_table_addr(bs, c, i);
+
+    trace_qcow2_cache_get_done(qemu_coroutine_self(),
+                               c == s->l2_table_cache, i);
+
+    return 0;
+}
+
+int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+    void **table)
+{
+    return qcow2_cache_do_get(bs, c, offset, table, true);
+}
+
+int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+    void **table)
+{
+    return qcow2_cache_do_get(bs, c, offset, table, false);
+}
+
+void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+{
+    int i = qcow2_cache_get_table_idx(bs, c, *table);
+
+    c->entries[i].ref--;
+    *table = NULL;
+
+    if (c->entries[i].ref == 0) {
+        c->entries[i].lru_counter = ++c->lru_counter;
+    }
+
+    assert(c->entries[i].ref >= 0);
+}
+
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+     void *table)
+{
+    int i = qcow2_cache_get_table_idx(bs, c, table);
+    assert(c->entries[i].offset != 0);
+    c->entries[i].dirty = true;
+}