These changes are the raw update to qemu-2.6.
[kvmfornfv.git] / qemu / block / qcow2-cache.c
index 53b8afc..0fe8eda 100644 (file)
  * THE SOFTWARE.
  */
 
+/* Needed for CONFIG_MADVISE */
+#include "qemu/osdep.h"
+
+#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
+#include <sys/mman.h>
+#endif
+
 #include "block/block_int.h"
 #include "qemu-common.h"
 #include "qcow2.h"
@@ -29,9 +36,9 @@
 
 typedef struct Qcow2CachedTable {
     int64_t  offset;
-    bool     dirty;
     uint64_t lru_counter;
     int      ref;
+    bool     dirty;
 } Qcow2CachedTable;
 
 struct Qcow2Cache {
@@ -41,34 +48,85 @@ struct Qcow2Cache {
     bool                    depends_on_flush;
     void                   *table_array;
     uint64_t                lru_counter;
+    uint64_t                cache_clean_lru_counter;
 };
 
 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
                     Qcow2Cache *c, int table)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
 }
 
 static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
                   Qcow2Cache *c, void *table)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
     int idx = table_offset / s->cluster_size;
     assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
     return idx;
 }
 
+static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
+                                      int i, int num_tables)
+{
+#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
+    BDRVQcow2State *s = bs->opaque;
+    void *t = qcow2_cache_get_table_addr(bs, c, i);
+    int align = getpagesize();
+    size_t mem_size = (size_t) s->cluster_size * num_tables;
+    size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
+    size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
+    if (length > 0) {
+        qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
+    }
+#endif
+}
+
+static inline bool can_clean_entry(Qcow2Cache *c, int i)
+{
+    Qcow2CachedTable *t = &c->entries[i];
+    return t->ref == 0 && !t->dirty && t->offset != 0 &&
+        t->lru_counter <= c->cache_clean_lru_counter;
+}
+
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+{
+    int i = 0;
+    while (i < c->size) {
+        int to_clean = 0;
+
+        /* Skip the entries that we don't need to clean */
+        while (i < c->size && !can_clean_entry(c, i)) {
+            i++;
+        }
+
+        /* And count how many we can clean in a row */
+        while (i < c->size && can_clean_entry(c, i)) {
+            c->entries[i].offset = 0;
+            c->entries[i].lru_counter = 0;
+            i++;
+            to_clean++;
+        }
+
+        if (to_clean > 0) {
+            qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+        }
+    }
+
+    c->cache_clean_lru_counter = c->lru_counter;
+}
+
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     Qcow2Cache *c;
 
     c = g_new0(Qcow2Cache, 1);
     c->size = num_tables;
     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    c->table_array = qemu_try_blockalign(bs->file,
+    c->table_array = qemu_try_blockalign(bs->file->bs,
                                          (size_t) num_tables * s->cluster_size);
 
     if (!c->entries || !c->table_array) {
@@ -113,7 +171,7 @@ static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
 
 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int ret = 0;
 
     if (!c->entries[i].dirty || !c->entries[i].offset) {
@@ -126,7 +184,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
     if (c->depends) {
         ret = qcow2_cache_flush_dependency(bs, c);
     } else if (c->depends_on_flush) {
-        ret = bdrv_flush(bs->file);
+        ret = bdrv_flush(bs->file->bs);
         if (ret >= 0) {
             c->depends_on_flush = false;
         }
@@ -157,7 +215,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
     }
 
-    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+    ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
                       qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
     if (ret < 0) {
         return ret;
@@ -170,7 +228,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int result = 0;
     int ret;
     int i;
@@ -185,7 +243,7 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
     }
 
     if (result == 0) {
-        ret = bdrv_flush(bs->file);
+        ret = bdrv_flush(bs->file->bs);
         if (ret < 0) {
             result = ret;
         }
@@ -237,6 +295,8 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
         c->entries[i].lru_counter = 0;
     }
 
+    qcow2_cache_table_release(bs, c, 0, c->size);
+
     c->lru_counter = 0;
 
     return 0;
@@ -245,7 +305,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
     uint64_t offset, void **table, bool read_from_disk)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int i;
     int ret;
     int lookup_index;
@@ -295,7 +355,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
         }
 
-        ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+        ret = bdrv_pread(bs->file->bs, offset,
+                         qcow2_cache_get_table_addr(bs, c, i),
                          s->cluster_size);
         if (ret < 0) {
             return ret;