These changes are the raw update to qemu-2.6.
[kvmfornfv.git] / qemu / backends / hostmem.c
1 /*
2  * QEMU Host Memory Backend
3  *
4  * Copyright (C) 2013-2014 Red Hat Inc
5  *
6  * Authors:
7  *   Igor Mammedov <imammedo@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "sysemu/hostmem.h"
14 #include "hw/boards.h"
15 #include "qapi/error.h"
16 #include "qapi/visitor.h"
17 #include "qapi-types.h"
18 #include "qapi-visit.h"
19 #include "qemu/config-file.h"
20 #include "qom/object_interfaces.h"
21
22 #ifdef CONFIG_NUMA
23 #include <numaif.h>
24 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
28 #endif
29
30 static void
31 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
32                              void *opaque, Error **errp)
33 {
34     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
35     uint64_t value = backend->size;
36
37     visit_type_size(v, name, &value, errp);
38 }
39
40 static void
41 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
42                              void *opaque, Error **errp)
43 {
44     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
45     Error *local_err = NULL;
46     uint64_t value;
47
48     if (memory_region_size(&backend->mr)) {
49         error_setg(&local_err, "cannot change property value");
50         goto out;
51     }
52
53     visit_type_size(v, name, &value, &local_err);
54     if (local_err) {
55         goto out;
56     }
57     if (!value) {
58         error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
59                    PRIu64 "'", object_get_typename(obj), name, value);
60         goto out;
61     }
62     backend->size = value;
63 out:
64     error_propagate(errp, local_err);
65 }
66
67 static void
68 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
69                                    void *opaque, Error **errp)
70 {
71     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
72     uint16List *host_nodes = NULL;
73     uint16List **node = &host_nodes;
74     unsigned long value;
75
76     value = find_first_bit(backend->host_nodes, MAX_NODES);
77     if (value == MAX_NODES) {
78         return;
79     }
80
81     *node = g_malloc0(sizeof(**node));
82     (*node)->value = value;
83     node = &(*node)->next;
84
85     do {
86         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
87         if (value == MAX_NODES) {
88             break;
89         }
90
91         *node = g_malloc0(sizeof(**node));
92         (*node)->value = value;
93         node = &(*node)->next;
94     } while (true);
95
96     visit_type_uint16List(v, name, &host_nodes, errp);
97 }
98
99 static void
100 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
101                                    void *opaque, Error **errp)
102 {
103 #ifdef CONFIG_NUMA
104     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
105     uint16List *l = NULL;
106
107     visit_type_uint16List(v, name, &l, errp);
108
109     while (l) {
110         bitmap_set(backend->host_nodes, l->value, 1);
111         l = l->next;
112     }
113 #else
114     error_setg(errp, "NUMA node binding are not supported by this QEMU");
115 #endif
116 }
117
118 static int
119 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
120 {
121     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
122     return backend->policy;
123 }
124
125 static void
126 host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
127 {
128     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
129     backend->policy = policy;
130
131 #ifndef CONFIG_NUMA
132     if (policy != HOST_MEM_POLICY_DEFAULT) {
133         error_setg(errp, "NUMA policies are not supported by this QEMU");
134     }
135 #endif
136 }
137
138 static bool host_memory_backend_get_merge(Object *obj, Error **errp)
139 {
140     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
141
142     return backend->merge;
143 }
144
145 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
146 {
147     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
148
149     if (!memory_region_size(&backend->mr)) {
150         backend->merge = value;
151         return;
152     }
153
154     if (value != backend->merge) {
155         void *ptr = memory_region_get_ram_ptr(&backend->mr);
156         uint64_t sz = memory_region_size(&backend->mr);
157
158         qemu_madvise(ptr, sz,
159                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
160         backend->merge = value;
161     }
162 }
163
164 static bool host_memory_backend_get_dump(Object *obj, Error **errp)
165 {
166     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
167
168     return backend->dump;
169 }
170
171 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
172 {
173     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
174
175     if (!memory_region_size(&backend->mr)) {
176         backend->dump = value;
177         return;
178     }
179
180     if (value != backend->dump) {
181         void *ptr = memory_region_get_ram_ptr(&backend->mr);
182         uint64_t sz = memory_region_size(&backend->mr);
183
184         qemu_madvise(ptr, sz,
185                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
186         backend->dump = value;
187     }
188 }
189
190 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
191 {
192     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
193
194     return backend->prealloc || backend->force_prealloc;
195 }
196
197 static void host_memory_backend_set_prealloc(Object *obj, bool value,
198                                              Error **errp)
199 {
200     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
201
202     if (backend->force_prealloc) {
203         if (value) {
204             error_setg(errp,
205                        "remove -mem-prealloc to use the prealloc property");
206             return;
207         }
208     }
209
210     if (!memory_region_size(&backend->mr)) {
211         backend->prealloc = value;
212         return;
213     }
214
215     if (value && !backend->prealloc) {
216         int fd = memory_region_get_fd(&backend->mr);
217         void *ptr = memory_region_get_ram_ptr(&backend->mr);
218         uint64_t sz = memory_region_size(&backend->mr);
219
220         os_mem_prealloc(fd, ptr, sz);
221         backend->prealloc = true;
222     }
223 }
224
225 static void host_memory_backend_init(Object *obj)
226 {
227     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
228     MachineState *machine = MACHINE(qdev_get_machine());
229
230     backend->merge = machine_mem_merge(machine);
231     backend->dump = machine_dump_guest_core(machine);
232     backend->prealloc = mem_prealloc;
233
234     object_property_add_bool(obj, "merge",
235                         host_memory_backend_get_merge,
236                         host_memory_backend_set_merge, NULL);
237     object_property_add_bool(obj, "dump",
238                         host_memory_backend_get_dump,
239                         host_memory_backend_set_dump, NULL);
240     object_property_add_bool(obj, "prealloc",
241                         host_memory_backend_get_prealloc,
242                         host_memory_backend_set_prealloc, NULL);
243     object_property_add(obj, "size", "int",
244                         host_memory_backend_get_size,
245                         host_memory_backend_set_size, NULL, NULL, NULL);
246     object_property_add(obj, "host-nodes", "int",
247                         host_memory_backend_get_host_nodes,
248                         host_memory_backend_set_host_nodes, NULL, NULL, NULL);
249     object_property_add_enum(obj, "policy", "HostMemPolicy",
250                              HostMemPolicy_lookup,
251                              host_memory_backend_get_policy,
252                              host_memory_backend_set_policy, NULL);
253 }
254
255 MemoryRegion *
256 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
257 {
258     return memory_region_size(&backend->mr) ? &backend->mr : NULL;
259 }
260
261 static void
262 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
263 {
264     HostMemoryBackend *backend = MEMORY_BACKEND(uc);
265     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
266     Error *local_err = NULL;
267     void *ptr;
268     uint64_t sz;
269
270     if (bc->alloc) {
271         bc->alloc(backend, &local_err);
272         if (local_err) {
273             error_propagate(errp, local_err);
274             return;
275         }
276
277         ptr = memory_region_get_ram_ptr(&backend->mr);
278         sz = memory_region_size(&backend->mr);
279
280         if (backend->merge) {
281             qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
282         }
283         if (!backend->dump) {
284             qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
285         }
286 #ifdef CONFIG_NUMA
287         unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
288         /* lastbit == MAX_NODES means maxnode = 0 */
289         unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
290         /* ensure policy won't be ignored in case memory is preallocated
291          * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
292          * this doesn't catch hugepage case. */
293         unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
294
295         /* check for invalid host-nodes and policies and give more verbose
296          * error messages than mbind(). */
297         if (maxnode && backend->policy == MPOL_DEFAULT) {
298             error_setg(errp, "host-nodes must be empty for policy default,"
299                        " or you should explicitly specify a policy other"
300                        " than default");
301             return;
302         } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
303             error_setg(errp, "host-nodes must be set for policy %s",
304                        HostMemPolicy_lookup[backend->policy]);
305             return;
306         }
307
308         /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
309          * as argument to mbind() due to an old Linux bug (feature?) which
310          * cuts off the last specified node. This means backend->host_nodes
311          * must have MAX_NODES+1 bits available.
312          */
313         assert(sizeof(backend->host_nodes) >=
314                BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
315         assert(maxnode <= MAX_NODES);
316         if (mbind(ptr, sz, backend->policy,
317                   maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
318             if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
319                 error_setg_errno(errp, errno,
320                                  "cannot bind memory to host NUMA nodes");
321                 return;
322             }
323         }
324 #endif
325         /* Preallocate memory after the NUMA policy has been instantiated.
326          * This is necessary to guarantee memory is allocated with
327          * specified NUMA policy in place.
328          */
329         if (backend->prealloc) {
330             os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
331         }
332     }
333 }
334
335 static bool
336 host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
337 {
338     MemoryRegion *mr;
339
340     mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp);
341     if (memory_region_is_mapped(mr)) {
342         return false;
343     } else {
344         return true;
345     }
346 }
347
348 static void
349 host_memory_backend_class_init(ObjectClass *oc, void *data)
350 {
351     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
352
353     ucc->complete = host_memory_backend_memory_complete;
354     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
355 }
356
357 static const TypeInfo host_memory_backend_info = {
358     .name = TYPE_MEMORY_BACKEND,
359     .parent = TYPE_OBJECT,
360     .abstract = true,
361     .class_size = sizeof(HostMemoryBackendClass),
362     .class_init = host_memory_backend_class_init,
363     .instance_size = sizeof(HostMemoryBackend),
364     .instance_init = host_memory_backend_init,
365     .interfaces = (InterfaceInfo[]) {
366         { TYPE_USER_CREATABLE },
367         { }
368     }
369 };
370
371 static void register_types(void)
372 {
373     type_register_static(&host_memory_backend_info);
374 }
375
376 type_init(register_types);