These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / arch / um / kernel / tlb.c
1 /*
2  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3  * Licensed under the GPL
4  */
5
6 #include <linux/mm.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <asm/pgtable.h>
10 #include <asm/tlbflush.h>
11 #include <as-layout.h>
12 #include <mem_user.h>
13 #include <os.h>
14 #include <skas.h>
15 #include <kern_util.h>
16
17 struct host_vm_change {
18         struct host_vm_op {
19                 enum { NONE, MMAP, MUNMAP, MPROTECT } type;
20                 union {
21                         struct {
22                                 unsigned long addr;
23                                 unsigned long len;
24                                 unsigned int prot;
25                                 int fd;
26                                 __u64 offset;
27                         } mmap;
28                         struct {
29                                 unsigned long addr;
30                                 unsigned long len;
31                         } munmap;
32                         struct {
33                                 unsigned long addr;
34                                 unsigned long len;
35                                 unsigned int prot;
36                         } mprotect;
37                 } u;
38         } ops[1];
39         int index;
40         struct mm_id *id;
41         void *data;
42         int force;
43 };
44
45 #define INIT_HVC(mm, force) \
46         ((struct host_vm_change) \
47          { .ops         = { { .type = NONE } }, \
48            .id          = &mm->context.id, \
49            .data        = NULL, \
50            .index       = 0, \
51            .force       = force })
52
53 static void report_enomem(void)
54 {
55         printk(KERN_ERR "UML ran out of memory on the host side! "
56                         "This can happen due to a memory limitation or "
57                         "vm.max_map_count has been reached.\n");
58 }
59
60 static int do_ops(struct host_vm_change *hvc, int end,
61                   int finished)
62 {
63         struct host_vm_op *op;
64         int i, ret = 0;
65
66         for (i = 0; i < end && !ret; i++) {
67                 op = &hvc->ops[i];
68                 switch (op->type) {
69                 case MMAP:
70                         ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
71                                   op->u.mmap.prot, op->u.mmap.fd,
72                                   op->u.mmap.offset, finished, &hvc->data);
73                         break;
74                 case MUNMAP:
75                         ret = unmap(hvc->id, op->u.munmap.addr,
76                                     op->u.munmap.len, finished, &hvc->data);
77                         break;
78                 case MPROTECT:
79                         ret = protect(hvc->id, op->u.mprotect.addr,
80                                       op->u.mprotect.len, op->u.mprotect.prot,
81                                       finished, &hvc->data);
82                         break;
83                 default:
84                         printk(KERN_ERR "Unknown op type %d in do_ops\n",
85                                op->type);
86                         BUG();
87                         break;
88                 }
89         }
90
91         if (ret == -ENOMEM)
92                 report_enomem();
93
94         return ret;
95 }
96
97 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
98                     unsigned int prot, struct host_vm_change *hvc)
99 {
100         __u64 offset;
101         struct host_vm_op *last;
102         int fd, ret = 0;
103
104         fd = phys_mapping(phys, &offset);
105         if (hvc->index != 0) {
106                 last = &hvc->ops[hvc->index - 1];
107                 if ((last->type == MMAP) &&
108                    (last->u.mmap.addr + last->u.mmap.len == virt) &&
109                    (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
110                    (last->u.mmap.offset + last->u.mmap.len == offset)) {
111                         last->u.mmap.len += len;
112                         return 0;
113                 }
114         }
115
116         if (hvc->index == ARRAY_SIZE(hvc->ops)) {
117                 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
118                 hvc->index = 0;
119         }
120
121         hvc->ops[hvc->index++] = ((struct host_vm_op)
122                                   { .type       = MMAP,
123                                     .u = { .mmap = { .addr      = virt,
124                                                      .len       = len,
125                                                      .prot      = prot,
126                                                      .fd        = fd,
127                                                      .offset    = offset }
128                            } });
129         return ret;
130 }
131
132 static int add_munmap(unsigned long addr, unsigned long len,
133                       struct host_vm_change *hvc)
134 {
135         struct host_vm_op *last;
136         int ret = 0;
137
138         if ((addr >= STUB_START) && (addr < STUB_END))
139                 return -EINVAL;
140
141         if (hvc->index != 0) {
142                 last = &hvc->ops[hvc->index - 1];
143                 if ((last->type == MUNMAP) &&
144                    (last->u.munmap.addr + last->u.mmap.len == addr)) {
145                         last->u.munmap.len += len;
146                         return 0;
147                 }
148         }
149
150         if (hvc->index == ARRAY_SIZE(hvc->ops)) {
151                 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
152                 hvc->index = 0;
153         }
154
155         hvc->ops[hvc->index++] = ((struct host_vm_op)
156                                   { .type       = MUNMAP,
157                                     .u = { .munmap = { .addr    = addr,
158                                                        .len     = len } } });
159         return ret;
160 }
161
162 static int add_mprotect(unsigned long addr, unsigned long len,
163                         unsigned int prot, struct host_vm_change *hvc)
164 {
165         struct host_vm_op *last;
166         int ret = 0;
167
168         if (hvc->index != 0) {
169                 last = &hvc->ops[hvc->index - 1];
170                 if ((last->type == MPROTECT) &&
171                    (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
172                    (last->u.mprotect.prot == prot)) {
173                         last->u.mprotect.len += len;
174                         return 0;
175                 }
176         }
177
178         if (hvc->index == ARRAY_SIZE(hvc->ops)) {
179                 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
180                 hvc->index = 0;
181         }
182
183         hvc->ops[hvc->index++] = ((struct host_vm_op)
184                                   { .type       = MPROTECT,
185                                     .u = { .mprotect = { .addr  = addr,
186                                                          .len   = len,
187                                                          .prot  = prot } } });
188         return ret;
189 }
190
191 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
192
193 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
194                                    unsigned long end,
195                                    struct host_vm_change *hvc)
196 {
197         pte_t *pte;
198         int r, w, x, prot, ret = 0;
199
200         pte = pte_offset_kernel(pmd, addr);
201         do {
202                 if ((addr >= STUB_START) && (addr < STUB_END))
203                         continue;
204
205                 r = pte_read(*pte);
206                 w = pte_write(*pte);
207                 x = pte_exec(*pte);
208                 if (!pte_young(*pte)) {
209                         r = 0;
210                         w = 0;
211                 } else if (!pte_dirty(*pte))
212                         w = 0;
213
214                 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
215                         (x ? UM_PROT_EXEC : 0));
216                 if (hvc->force || pte_newpage(*pte)) {
217                         if (pte_present(*pte))
218                                 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
219                                                PAGE_SIZE, prot, hvc);
220                         else
221                                 ret = add_munmap(addr, PAGE_SIZE, hvc);
222                 } else if (pte_newprot(*pte))
223                         ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
224                 *pte = pte_mkuptodate(*pte);
225         } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
226         return ret;
227 }
228
229 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
230                                    unsigned long end,
231                                    struct host_vm_change *hvc)
232 {
233         pmd_t *pmd;
234         unsigned long next;
235         int ret = 0;
236
237         pmd = pmd_offset(pud, addr);
238         do {
239                 next = pmd_addr_end(addr, end);
240                 if (!pmd_present(*pmd)) {
241                         if (hvc->force || pmd_newpage(*pmd)) {
242                                 ret = add_munmap(addr, next - addr, hvc);
243                                 pmd_mkuptodate(*pmd);
244                         }
245                 }
246                 else ret = update_pte_range(pmd, addr, next, hvc);
247         } while (pmd++, addr = next, ((addr < end) && !ret));
248         return ret;
249 }
250
251 static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
252                                    unsigned long end,
253                                    struct host_vm_change *hvc)
254 {
255         pud_t *pud;
256         unsigned long next;
257         int ret = 0;
258
259         pud = pud_offset(pgd, addr);
260         do {
261                 next = pud_addr_end(addr, end);
262                 if (!pud_present(*pud)) {
263                         if (hvc->force || pud_newpage(*pud)) {
264                                 ret = add_munmap(addr, next - addr, hvc);
265                                 pud_mkuptodate(*pud);
266                         }
267                 }
268                 else ret = update_pmd_range(pud, addr, next, hvc);
269         } while (pud++, addr = next, ((addr < end) && !ret));
270         return ret;
271 }
272
273 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
274                       unsigned long end_addr, int force)
275 {
276         pgd_t *pgd;
277         struct host_vm_change hvc;
278         unsigned long addr = start_addr, next;
279         int ret = 0;
280
281         hvc = INIT_HVC(mm, force);
282         pgd = pgd_offset(mm, addr);
283         do {
284                 next = pgd_addr_end(addr, end_addr);
285                 if (!pgd_present(*pgd)) {
286                         if (force || pgd_newpage(*pgd)) {
287                                 ret = add_munmap(addr, next - addr, &hvc);
288                                 pgd_mkuptodate(*pgd);
289                         }
290                 }
291                 else ret = update_pud_range(pgd, addr, next, &hvc);
292         } while (pgd++, addr = next, ((addr < end_addr) && !ret));
293
294         if (!ret)
295                 ret = do_ops(&hvc, hvc.index, 1);
296
297         /* This is not an else because ret is modified above */
298         if (ret) {
299                 printk(KERN_ERR "fix_range_common: failed, killing current "
300                        "process: %d\n", task_tgid_vnr(current));
301                 /* We are under mmap_sem, release it such that current can terminate */
302                 up_write(&current->mm->mmap_sem);
303                 force_sig(SIGKILL, current);
304                 do_signal(&current->thread.regs);
305         }
306 }
307
308 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
309 {
310         struct mm_struct *mm;
311         pgd_t *pgd;
312         pud_t *pud;
313         pmd_t *pmd;
314         pte_t *pte;
315         unsigned long addr, last;
316         int updated = 0, err;
317
318         mm = &init_mm;
319         for (addr = start; addr < end;) {
320                 pgd = pgd_offset(mm, addr);
321                 if (!pgd_present(*pgd)) {
322                         last = ADD_ROUND(addr, PGDIR_SIZE);
323                         if (last > end)
324                                 last = end;
325                         if (pgd_newpage(*pgd)) {
326                                 updated = 1;
327                                 err = os_unmap_memory((void *) addr,
328                                                       last - addr);
329                                 if (err < 0)
330                                         panic("munmap failed, errno = %d\n",
331                                               -err);
332                         }
333                         addr = last;
334                         continue;
335                 }
336
337                 pud = pud_offset(pgd, addr);
338                 if (!pud_present(*pud)) {
339                         last = ADD_ROUND(addr, PUD_SIZE);
340                         if (last > end)
341                                 last = end;
342                         if (pud_newpage(*pud)) {
343                                 updated = 1;
344                                 err = os_unmap_memory((void *) addr,
345                                                       last - addr);
346                                 if (err < 0)
347                                         panic("munmap failed, errno = %d\n",
348                                               -err);
349                         }
350                         addr = last;
351                         continue;
352                 }
353
354                 pmd = pmd_offset(pud, addr);
355                 if (!pmd_present(*pmd)) {
356                         last = ADD_ROUND(addr, PMD_SIZE);
357                         if (last > end)
358                                 last = end;
359                         if (pmd_newpage(*pmd)) {
360                                 updated = 1;
361                                 err = os_unmap_memory((void *) addr,
362                                                       last - addr);
363                                 if (err < 0)
364                                         panic("munmap failed, errno = %d\n",
365                                               -err);
366                         }
367                         addr = last;
368                         continue;
369                 }
370
371                 pte = pte_offset_kernel(pmd, addr);
372                 if (!pte_present(*pte) || pte_newpage(*pte)) {
373                         updated = 1;
374                         err = os_unmap_memory((void *) addr,
375                                               PAGE_SIZE);
376                         if (err < 0)
377                                 panic("munmap failed, errno = %d\n",
378                                       -err);
379                         if (pte_present(*pte))
380                                 map_memory(addr,
381                                            pte_val(*pte) & PAGE_MASK,
382                                            PAGE_SIZE, 1, 1, 1);
383                 }
384                 else if (pte_newprot(*pte)) {
385                         updated = 1;
386                         os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
387                 }
388                 addr += PAGE_SIZE;
389         }
390         return updated;
391 }
392
393 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
394 {
395         pgd_t *pgd;
396         pud_t *pud;
397         pmd_t *pmd;
398         pte_t *pte;
399         struct mm_struct *mm = vma->vm_mm;
400         void *flush = NULL;
401         int r, w, x, prot, err = 0;
402         struct mm_id *mm_id;
403
404         address &= PAGE_MASK;
405         pgd = pgd_offset(mm, address);
406         if (!pgd_present(*pgd))
407                 goto kill;
408
409         pud = pud_offset(pgd, address);
410         if (!pud_present(*pud))
411                 goto kill;
412
413         pmd = pmd_offset(pud, address);
414         if (!pmd_present(*pmd))
415                 goto kill;
416
417         pte = pte_offset_kernel(pmd, address);
418
419         r = pte_read(*pte);
420         w = pte_write(*pte);
421         x = pte_exec(*pte);
422         if (!pte_young(*pte)) {
423                 r = 0;
424                 w = 0;
425         } else if (!pte_dirty(*pte)) {
426                 w = 0;
427         }
428
429         mm_id = &mm->context.id;
430         prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
431                 (x ? UM_PROT_EXEC : 0));
432         if (pte_newpage(*pte)) {
433                 if (pte_present(*pte)) {
434                         unsigned long long offset;
435                         int fd;
436
437                         fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
438                         err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
439                                   1, &flush);
440                 }
441                 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
442         }
443         else if (pte_newprot(*pte))
444                 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
445
446         if (err) {
447                 if (err == -ENOMEM)
448                         report_enomem();
449
450                 goto kill;
451         }
452
453         *pte = pte_mkuptodate(*pte);
454
455         return;
456
457 kill:
458         printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
459         force_sig(SIGKILL, current);
460 }
461
462 pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
463 {
464         return pgd_offset(mm, address);
465 }
466
467 pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
468 {
469         return pud_offset(pgd, address);
470 }
471
472 pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
473 {
474         return pmd_offset(pud, address);
475 }
476
477 pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
478 {
479         return pte_offset_kernel(pmd, address);
480 }
481
482 pte_t *addr_pte(struct task_struct *task, unsigned long addr)
483 {
484         pgd_t *pgd = pgd_offset(task->mm, addr);
485         pud_t *pud = pud_offset(pgd, addr);
486         pmd_t *pmd = pmd_offset(pud, addr);
487
488         return pte_offset_map(pmd, addr);
489 }
490
491 void flush_tlb_all(void)
492 {
493         flush_tlb_mm(current->mm);
494 }
495
496 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
497 {
498         flush_tlb_kernel_range_common(start, end);
499 }
500
501 void flush_tlb_kernel_vm(void)
502 {
503         flush_tlb_kernel_range_common(start_vm, end_vm);
504 }
505
506 void __flush_tlb_one(unsigned long addr)
507 {
508         flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
509 }
510
511 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
512                       unsigned long end_addr, int force)
513 {
514         fix_range_common(mm, start_addr, end_addr, force);
515 }
516
517 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
518                      unsigned long end)
519 {
520         if (vma->vm_mm == NULL)
521                 flush_tlb_kernel_range_common(start, end);
522         else fix_range(vma->vm_mm, start, end, 0);
523 }
524 EXPORT_SYMBOL(flush_tlb_range);
525
526 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
527                         unsigned long end)
528 {
529         /*
530          * Don't bother flushing if this address space is about to be
531          * destroyed.
532          */
533         if (atomic_read(&mm->mm_users) == 0)
534                 return;
535
536         fix_range(mm, start, end, 0);
537 }
538
539 void flush_tlb_mm(struct mm_struct *mm)
540 {
541         struct vm_area_struct *vma = mm->mmap;
542
543         while (vma != NULL) {
544                 fix_range(mm, vma->vm_start, vma->vm_end, 0);
545                 vma = vma->vm_next;
546         }
547 }
548
549 void force_flush_all(void)
550 {
551         struct mm_struct *mm = current->mm;
552         struct vm_area_struct *vma = mm->mmap;
553
554         while (vma != NULL) {
555                 fix_range(mm, vma->vm_start, vma->vm_end, 1);
556                 vma = vma->vm_next;
557         }
558 }