These changes are a raw update to a vanilla kernel 4.1.10, with the
[kvmfornfv.git] / kernel / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/dma_remapping.h>
35 #include <linux/uaccess.h>
36
37 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
38 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
39 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
40 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
41 #define  __EXEC_OBJECT_PURGEABLE (1<<27)
42
43 #define BATCH_OFFSET_BIAS (256*1024)
44
45 struct eb_vmas {
46         struct list_head vmas;
47         int and;
48         union {
49                 struct i915_vma *lut[0];
50                 struct hlist_head buckets[0];
51         };
52 };
53
54 static struct eb_vmas *
55 eb_create(struct drm_i915_gem_execbuffer2 *args)
56 {
57         struct eb_vmas *eb = NULL;
58
59         if (args->flags & I915_EXEC_HANDLE_LUT) {
60                 unsigned size = args->buffer_count;
61                 size *= sizeof(struct i915_vma *);
62                 size += sizeof(struct eb_vmas);
63                 eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
64         }
65
66         if (eb == NULL) {
67                 unsigned size = args->buffer_count;
68                 unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
69                 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
70                 while (count > 2*size)
71                         count >>= 1;
72                 eb = kzalloc(count*sizeof(struct hlist_head) +
73                              sizeof(struct eb_vmas),
74                              GFP_TEMPORARY);
75                 if (eb == NULL)
76                         return eb;
77
78                 eb->and = count - 1;
79         } else
80                 eb->and = -args->buffer_count;
81
82         INIT_LIST_HEAD(&eb->vmas);
83         return eb;
84 }
85
86 static void
87 eb_reset(struct eb_vmas *eb)
88 {
89         if (eb->and >= 0)
90                 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
91 }
92
93 static int
94 eb_lookup_vmas(struct eb_vmas *eb,
95                struct drm_i915_gem_exec_object2 *exec,
96                const struct drm_i915_gem_execbuffer2 *args,
97                struct i915_address_space *vm,
98                struct drm_file *file)
99 {
100         struct drm_i915_gem_object *obj;
101         struct list_head objects;
102         int i, ret;
103
104         INIT_LIST_HEAD(&objects);
105         spin_lock(&file->table_lock);
106         /* Grab a reference to the object and release the lock so we can lookup
107          * or create the VMA without using GFP_ATOMIC */
108         for (i = 0; i < args->buffer_count; i++) {
109                 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
110                 if (obj == NULL) {
111                         spin_unlock(&file->table_lock);
112                         DRM_DEBUG("Invalid object handle %d at index %d\n",
113                                    exec[i].handle, i);
114                         ret = -ENOENT;
115                         goto err;
116                 }
117
118                 if (!list_empty(&obj->obj_exec_link)) {
119                         spin_unlock(&file->table_lock);
120                         DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
121                                    obj, exec[i].handle, i);
122                         ret = -EINVAL;
123                         goto err;
124                 }
125
126                 drm_gem_object_reference(&obj->base);
127                 list_add_tail(&obj->obj_exec_link, &objects);
128         }
129         spin_unlock(&file->table_lock);
130
131         i = 0;
132         while (!list_empty(&objects)) {
133                 struct i915_vma *vma;
134
135                 obj = list_first_entry(&objects,
136                                        struct drm_i915_gem_object,
137                                        obj_exec_link);
138
139                 /*
140                  * NOTE: We can leak any vmas created here when something fails
141                  * later on. But that's no issue since vma_unbind can deal with
142                  * vmas which are not actually bound. And since only
143                  * lookup_or_create exists as an interface to get at the vma
144                  * from the (obj, vm) we don't run the risk of creating
145                  * duplicated vmas for the same vm.
146                  */
147                 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
148                 if (IS_ERR(vma)) {
149                         DRM_DEBUG("Failed to lookup VMA\n");
150                         ret = PTR_ERR(vma);
151                         goto err;
152                 }
153
154                 /* Transfer ownership from the objects list to the vmas list. */
155                 list_add_tail(&vma->exec_list, &eb->vmas);
156                 list_del_init(&obj->obj_exec_link);
157
158                 vma->exec_entry = &exec[i];
159                 if (eb->and < 0) {
160                         eb->lut[i] = vma;
161                 } else {
162                         uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
163                         vma->exec_handle = handle;
164                         hlist_add_head(&vma->exec_node,
165                                        &eb->buckets[handle & eb->and]);
166                 }
167                 ++i;
168         }
169
170         return 0;
171
172
173 err:
174         while (!list_empty(&objects)) {
175                 obj = list_first_entry(&objects,
176                                        struct drm_i915_gem_object,
177                                        obj_exec_link);
178                 list_del_init(&obj->obj_exec_link);
179                 drm_gem_object_unreference(&obj->base);
180         }
181         /*
182          * Objects already transfered to the vmas list will be unreferenced by
183          * eb_destroy.
184          */
185
186         return ret;
187 }
188
189 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
190 {
191         if (eb->and < 0) {
192                 if (handle >= -eb->and)
193                         return NULL;
194                 return eb->lut[handle];
195         } else {
196                 struct hlist_head *head;
197                 struct hlist_node *node;
198
199                 head = &eb->buckets[handle & eb->and];
200                 hlist_for_each(node, head) {
201                         struct i915_vma *vma;
202
203                         vma = hlist_entry(node, struct i915_vma, exec_node);
204                         if (vma->exec_handle == handle)
205                                 return vma;
206                 }
207                 return NULL;
208         }
209 }
210
211 static void
212 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
213 {
214         struct drm_i915_gem_exec_object2 *entry;
215         struct drm_i915_gem_object *obj = vma->obj;
216
217         if (!drm_mm_node_allocated(&vma->node))
218                 return;
219
220         entry = vma->exec_entry;
221
222         if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
223                 i915_gem_object_unpin_fence(obj);
224
225         if (entry->flags & __EXEC_OBJECT_HAS_PIN)
226                 vma->pin_count--;
227
228         if (entry->flags & __EXEC_OBJECT_PURGEABLE)
229                 obj->madv = I915_MADV_DONTNEED;
230
231         entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
232                           __EXEC_OBJECT_HAS_PIN |
233                           __EXEC_OBJECT_PURGEABLE);
234 }
235
236 static void eb_destroy(struct eb_vmas *eb)
237 {
238         while (!list_empty(&eb->vmas)) {
239                 struct i915_vma *vma;
240
241                 vma = list_first_entry(&eb->vmas,
242                                        struct i915_vma,
243                                        exec_list);
244                 list_del_init(&vma->exec_list);
245                 i915_gem_execbuffer_unreserve_vma(vma);
246                 drm_gem_object_unreference(&vma->obj->base);
247         }
248         kfree(eb);
249 }
250
251 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
252 {
253         return (HAS_LLC(obj->base.dev) ||
254                 obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
255                 obj->cache_level != I915_CACHE_NONE);
256 }
257
258 static int
259 relocate_entry_cpu(struct drm_i915_gem_object *obj,
260                    struct drm_i915_gem_relocation_entry *reloc,
261                    uint64_t target_offset)
262 {
263         struct drm_device *dev = obj->base.dev;
264         uint32_t page_offset = offset_in_page(reloc->offset);
265         uint64_t delta = reloc->delta + target_offset;
266         char *vaddr;
267         int ret;
268
269         ret = i915_gem_object_set_to_cpu_domain(obj, true);
270         if (ret)
271                 return ret;
272
273         vaddr = kmap_atomic(i915_gem_object_get_page(obj,
274                                 reloc->offset >> PAGE_SHIFT));
275         *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
276
277         if (INTEL_INFO(dev)->gen >= 8) {
278                 page_offset = offset_in_page(page_offset + sizeof(uint32_t));
279
280                 if (page_offset == 0) {
281                         kunmap_atomic(vaddr);
282                         vaddr = kmap_atomic(i915_gem_object_get_page(obj,
283                             (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
284                 }
285
286                 *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
287         }
288
289         kunmap_atomic(vaddr);
290
291         return 0;
292 }
293
294 static int
295 relocate_entry_gtt(struct drm_i915_gem_object *obj,
296                    struct drm_i915_gem_relocation_entry *reloc,
297                    uint64_t target_offset)
298 {
299         struct drm_device *dev = obj->base.dev;
300         struct drm_i915_private *dev_priv = dev->dev_private;
301         uint64_t delta = reloc->delta + target_offset;
302         uint64_t offset;
303         void __iomem *reloc_page;
304         int ret;
305
306         ret = i915_gem_object_set_to_gtt_domain(obj, true);
307         if (ret)
308                 return ret;
309
310         ret = i915_gem_object_put_fence(obj);
311         if (ret)
312                 return ret;
313
314         /* Map the page containing the relocation we're going to perform.  */
315         offset = i915_gem_obj_ggtt_offset(obj);
316         offset += reloc->offset;
317         reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
318                                               offset & PAGE_MASK);
319         iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
320
321         if (INTEL_INFO(dev)->gen >= 8) {
322                 offset += sizeof(uint32_t);
323
324                 if (offset_in_page(offset) == 0) {
325                         io_mapping_unmap_atomic(reloc_page);
326                         reloc_page =
327                                 io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
328                                                          offset);
329                 }
330
331                 iowrite32(upper_32_bits(delta),
332                           reloc_page + offset_in_page(offset));
333         }
334
335         io_mapping_unmap_atomic(reloc_page);
336
337         return 0;
338 }
339
340 static void
341 clflush_write32(void *addr, uint32_t value)
342 {
343         /* This is not a fast path, so KISS. */
344         drm_clflush_virt_range(addr, sizeof(uint32_t));
345         *(uint32_t *)addr = value;
346         drm_clflush_virt_range(addr, sizeof(uint32_t));
347 }
348
349 static int
350 relocate_entry_clflush(struct drm_i915_gem_object *obj,
351                        struct drm_i915_gem_relocation_entry *reloc,
352                        uint64_t target_offset)
353 {
354         struct drm_device *dev = obj->base.dev;
355         uint32_t page_offset = offset_in_page(reloc->offset);
356         uint64_t delta = (int)reloc->delta + target_offset;
357         char *vaddr;
358         int ret;
359
360         ret = i915_gem_object_set_to_gtt_domain(obj, true);
361         if (ret)
362                 return ret;
363
364         vaddr = kmap_atomic(i915_gem_object_get_page(obj,
365                                 reloc->offset >> PAGE_SHIFT));
366         clflush_write32(vaddr + page_offset, lower_32_bits(delta));
367
368         if (INTEL_INFO(dev)->gen >= 8) {
369                 page_offset = offset_in_page(page_offset + sizeof(uint32_t));
370
371                 if (page_offset == 0) {
372                         kunmap_atomic(vaddr);
373                         vaddr = kmap_atomic(i915_gem_object_get_page(obj,
374                             (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
375                 }
376
377                 clflush_write32(vaddr + page_offset, upper_32_bits(delta));
378         }
379
380         kunmap_atomic(vaddr);
381
382         return 0;
383 }
384
385 static int
386 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
387                                    struct eb_vmas *eb,
388                                    struct drm_i915_gem_relocation_entry *reloc)
389 {
390         struct drm_device *dev = obj->base.dev;
391         struct drm_gem_object *target_obj;
392         struct drm_i915_gem_object *target_i915_obj;
393         struct i915_vma *target_vma;
394         uint64_t target_offset;
395         int ret;
396
397         /* we've already hold a reference to all valid objects */
398         target_vma = eb_get_vma(eb, reloc->target_handle);
399         if (unlikely(target_vma == NULL))
400                 return -ENOENT;
401         target_i915_obj = target_vma->obj;
402         target_obj = &target_vma->obj->base;
403
404         target_offset = target_vma->node.start;
405
406         /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
407          * pipe_control writes because the gpu doesn't properly redirect them
408          * through the ppgtt for non_secure batchbuffers. */
409         if (unlikely(IS_GEN6(dev) &&
410             reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
411             !(target_vma->bound & GLOBAL_BIND))) {
412                 ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
413                                     GLOBAL_BIND);
414                 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
415                         return ret;
416         }
417
418         /* Validate that the target is in a valid r/w GPU domain */
419         if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
420                 DRM_DEBUG("reloc with multiple write domains: "
421                           "obj %p target %d offset %d "
422                           "read %08x write %08x",
423                           obj, reloc->target_handle,
424                           (int) reloc->offset,
425                           reloc->read_domains,
426                           reloc->write_domain);
427                 return -EINVAL;
428         }
429         if (unlikely((reloc->write_domain | reloc->read_domains)
430                      & ~I915_GEM_GPU_DOMAINS)) {
431                 DRM_DEBUG("reloc with read/write non-GPU domains: "
432                           "obj %p target %d offset %d "
433                           "read %08x write %08x",
434                           obj, reloc->target_handle,
435                           (int) reloc->offset,
436                           reloc->read_domains,
437                           reloc->write_domain);
438                 return -EINVAL;
439         }
440
441         target_obj->pending_read_domains |= reloc->read_domains;
442         target_obj->pending_write_domain |= reloc->write_domain;
443
444         /* If the relocation already has the right value in it, no
445          * more work needs to be done.
446          */
447         if (target_offset == reloc->presumed_offset)
448                 return 0;
449
450         /* Check that the relocation address is valid... */
451         if (unlikely(reloc->offset >
452                 obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
453                 DRM_DEBUG("Relocation beyond object bounds: "
454                           "obj %p target %d offset %d size %d.\n",
455                           obj, reloc->target_handle,
456                           (int) reloc->offset,
457                           (int) obj->base.size);
458                 return -EINVAL;
459         }
460         if (unlikely(reloc->offset & 3)) {
461                 DRM_DEBUG("Relocation not 4-byte aligned: "
462                           "obj %p target %d offset %d.\n",
463                           obj, reloc->target_handle,
464                           (int) reloc->offset);
465                 return -EINVAL;
466         }
467
468         /* We can't wait for rendering with pagefaults disabled */
469         if (obj->active && pagefault_disabled())
470                 return -EFAULT;
471
472         if (use_cpu_reloc(obj))
473                 ret = relocate_entry_cpu(obj, reloc, target_offset);
474         else if (obj->map_and_fenceable)
475                 ret = relocate_entry_gtt(obj, reloc, target_offset);
476         else if (cpu_has_clflush)
477                 ret = relocate_entry_clflush(obj, reloc, target_offset);
478         else {
479                 WARN_ONCE(1, "Impossible case in relocation handling\n");
480                 ret = -ENODEV;
481         }
482
483         if (ret)
484                 return ret;
485
486         /* and update the user's relocation entry */
487         reloc->presumed_offset = target_offset;
488
489         return 0;
490 }
491
492 static int
493 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
494                                  struct eb_vmas *eb)
495 {
496 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
497         struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
498         struct drm_i915_gem_relocation_entry __user *user_relocs;
499         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
500         int remain, ret;
501
502         user_relocs = to_user_ptr(entry->relocs_ptr);
503
504         remain = entry->relocation_count;
505         while (remain) {
506                 struct drm_i915_gem_relocation_entry *r = stack_reloc;
507                 int count = remain;
508                 if (count > ARRAY_SIZE(stack_reloc))
509                         count = ARRAY_SIZE(stack_reloc);
510                 remain -= count;
511
512                 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
513                         return -EFAULT;
514
515                 do {
516                         u64 offset = r->presumed_offset;
517
518                         ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
519                         if (ret)
520                                 return ret;
521
522                         if (r->presumed_offset != offset &&
523                             __copy_to_user_inatomic(&user_relocs->presumed_offset,
524                                                     &r->presumed_offset,
525                                                     sizeof(r->presumed_offset))) {
526                                 return -EFAULT;
527                         }
528
529                         user_relocs++;
530                         r++;
531                 } while (--count);
532         }
533
534         return 0;
535 #undef N_RELOC
536 }
537
538 static int
539 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
540                                       struct eb_vmas *eb,
541                                       struct drm_i915_gem_relocation_entry *relocs)
542 {
543         const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
544         int i, ret;
545
546         for (i = 0; i < entry->relocation_count; i++) {
547                 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
548                 if (ret)
549                         return ret;
550         }
551
552         return 0;
553 }
554
555 static int
556 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
557 {
558         struct i915_vma *vma;
559         int ret = 0;
560
561         /* This is the fast path and we cannot handle a pagefault whilst
562          * holding the struct mutex lest the user pass in the relocations
563          * contained within a mmaped bo. For in such a case we, the page
564          * fault handler would call i915_gem_fault() and we would try to
565          * acquire the struct mutex again. Obviously this is bad and so
566          * lockdep complains vehemently.
567          */
568         pagefault_disable();
569         list_for_each_entry(vma, &eb->vmas, exec_list) {
570                 ret = i915_gem_execbuffer_relocate_vma(vma, eb);
571                 if (ret)
572                         break;
573         }
574         pagefault_enable();
575
576         return ret;
577 }
578
579 static bool only_mappable_for_reloc(unsigned int flags)
580 {
581         return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
582                 __EXEC_OBJECT_NEEDS_MAP;
583 }
584
585 static int
586 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
587                                 struct intel_engine_cs *ring,
588                                 bool *need_reloc)
589 {
590         struct drm_i915_gem_object *obj = vma->obj;
591         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
592         uint64_t flags;
593         int ret;
594
595         flags = 0;
596         if (!drm_mm_node_allocated(&vma->node)) {
597                 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
598                         flags |= PIN_GLOBAL | PIN_MAPPABLE;
599                 if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
600                         flags |= PIN_GLOBAL;
601                 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
602                         flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
603         }
604
605         ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
606         if ((ret == -ENOSPC  || ret == -E2BIG) &&
607             only_mappable_for_reloc(entry->flags))
608                 ret = i915_gem_object_pin(obj, vma->vm,
609                                           entry->alignment,
610                                           flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
611         if (ret)
612                 return ret;
613
614         entry->flags |= __EXEC_OBJECT_HAS_PIN;
615
616         if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
617                 ret = i915_gem_object_get_fence(obj);
618                 if (ret)
619                         return ret;
620
621                 if (i915_gem_object_pin_fence(obj))
622                         entry->flags |= __EXEC_OBJECT_HAS_FENCE;
623         }
624
625         if (entry->offset != vma->node.start) {
626                 entry->offset = vma->node.start;
627                 *need_reloc = true;
628         }
629
630         if (entry->flags & EXEC_OBJECT_WRITE) {
631                 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
632                 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
633         }
634
635         return 0;
636 }
637
638 static bool
639 need_reloc_mappable(struct i915_vma *vma)
640 {
641         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
642
643         if (entry->relocation_count == 0)
644                 return false;
645
646         if (!i915_is_ggtt(vma->vm))
647                 return false;
648
649         /* See also use_cpu_reloc() */
650         if (HAS_LLC(vma->obj->base.dev))
651                 return false;
652
653         if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
654                 return false;
655
656         return true;
657 }
658
659 static bool
660 eb_vma_misplaced(struct i915_vma *vma)
661 {
662         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
663         struct drm_i915_gem_object *obj = vma->obj;
664
665         WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
666                !i915_is_ggtt(vma->vm));
667
668         if (entry->alignment &&
669             vma->node.start & (entry->alignment - 1))
670                 return true;
671
672         if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
673             vma->node.start < BATCH_OFFSET_BIAS)
674                 return true;
675
676         /* avoid costly ping-pong once a batch bo ended up non-mappable */
677         if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
678                 return !only_mappable_for_reloc(entry->flags);
679
680         return false;
681 }
682
683 static int
684 i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
685                             struct list_head *vmas,
686                             bool *need_relocs)
687 {
688         struct drm_i915_gem_object *obj;
689         struct i915_vma *vma;
690         struct i915_address_space *vm;
691         struct list_head ordered_vmas;
692         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
693         int retry;
694
695         i915_gem_retire_requests_ring(ring);
696
697         vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
698
699         INIT_LIST_HEAD(&ordered_vmas);
700         while (!list_empty(vmas)) {
701                 struct drm_i915_gem_exec_object2 *entry;
702                 bool need_fence, need_mappable;
703
704                 vma = list_first_entry(vmas, struct i915_vma, exec_list);
705                 obj = vma->obj;
706                 entry = vma->exec_entry;
707
708                 if (!has_fenced_gpu_access)
709                         entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
710                 need_fence =
711                         entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
712                         obj->tiling_mode != I915_TILING_NONE;
713                 need_mappable = need_fence || need_reloc_mappable(vma);
714
715                 if (need_mappable) {
716                         entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
717                         list_move(&vma->exec_list, &ordered_vmas);
718                 } else
719                         list_move_tail(&vma->exec_list, &ordered_vmas);
720
721                 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
722                 obj->base.pending_write_domain = 0;
723         }
724         list_splice(&ordered_vmas, vmas);
725
726         /* Attempt to pin all of the buffers into the GTT.
727          * This is done in 3 phases:
728          *
729          * 1a. Unbind all objects that do not match the GTT constraints for
730          *     the execbuffer (fenceable, mappable, alignment etc).
731          * 1b. Increment pin count for already bound objects.
732          * 2.  Bind new objects.
733          * 3.  Decrement pin count.
734          *
735          * This avoid unnecessary unbinding of later objects in order to make
736          * room for the earlier objects *unless* we need to defragment.
737          */
738         retry = 0;
739         do {
740                 int ret = 0;
741
742                 /* Unbind any ill-fitting objects or pin. */
743                 list_for_each_entry(vma, vmas, exec_list) {
744                         if (!drm_mm_node_allocated(&vma->node))
745                                 continue;
746
747                         if (eb_vma_misplaced(vma))
748                                 ret = i915_vma_unbind(vma);
749                         else
750                                 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
751                         if (ret)
752                                 goto err;
753                 }
754
755                 /* Bind fresh objects */
756                 list_for_each_entry(vma, vmas, exec_list) {
757                         if (drm_mm_node_allocated(&vma->node))
758                                 continue;
759
760                         ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
761                         if (ret)
762                                 goto err;
763                 }
764
765 err:
766                 if (ret != -ENOSPC || retry++)
767                         return ret;
768
769                 /* Decrement pin count for bound objects */
770                 list_for_each_entry(vma, vmas, exec_list)
771                         i915_gem_execbuffer_unreserve_vma(vma);
772
773                 ret = i915_gem_evict_vm(vm, true);
774                 if (ret)
775                         return ret;
776         } while (1);
777 }
778
779 static int
780 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
781                                   struct drm_i915_gem_execbuffer2 *args,
782                                   struct drm_file *file,
783                                   struct intel_engine_cs *ring,
784                                   struct eb_vmas *eb,
785                                   struct drm_i915_gem_exec_object2 *exec)
786 {
787         struct drm_i915_gem_relocation_entry *reloc;
788         struct i915_address_space *vm;
789         struct i915_vma *vma;
790         bool need_relocs;
791         int *reloc_offset;
792         int i, total, ret;
793         unsigned count = args->buffer_count;
794
795         vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
796
797         /* We may process another execbuffer during the unlock... */
798         while (!list_empty(&eb->vmas)) {
799                 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
800                 list_del_init(&vma->exec_list);
801                 i915_gem_execbuffer_unreserve_vma(vma);
802                 drm_gem_object_unreference(&vma->obj->base);
803         }
804
805         mutex_unlock(&dev->struct_mutex);
806
807         total = 0;
808         for (i = 0; i < count; i++)
809                 total += exec[i].relocation_count;
810
811         reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
812         reloc = drm_malloc_ab(total, sizeof(*reloc));
813         if (reloc == NULL || reloc_offset == NULL) {
814                 drm_free_large(reloc);
815                 drm_free_large(reloc_offset);
816                 mutex_lock(&dev->struct_mutex);
817                 return -ENOMEM;
818         }
819
820         total = 0;
821         for (i = 0; i < count; i++) {
822                 struct drm_i915_gem_relocation_entry __user *user_relocs;
823                 u64 invalid_offset = (u64)-1;
824                 int j;
825
826                 user_relocs = to_user_ptr(exec[i].relocs_ptr);
827
828                 if (copy_from_user(reloc+total, user_relocs,
829                                    exec[i].relocation_count * sizeof(*reloc))) {
830                         ret = -EFAULT;
831                         mutex_lock(&dev->struct_mutex);
832                         goto err;
833                 }
834
835                 /* As we do not update the known relocation offsets after
836                  * relocating (due to the complexities in lock handling),
837                  * we need to mark them as invalid now so that we force the
838                  * relocation processing next time. Just in case the target
839                  * object is evicted and then rebound into its old
840                  * presumed_offset before the next execbuffer - if that
841                  * happened we would make the mistake of assuming that the
842                  * relocations were valid.
843                  */
844                 for (j = 0; j < exec[i].relocation_count; j++) {
845                         if (__copy_to_user(&user_relocs[j].presumed_offset,
846                                            &invalid_offset,
847                                            sizeof(invalid_offset))) {
848                                 ret = -EFAULT;
849                                 mutex_lock(&dev->struct_mutex);
850                                 goto err;
851                         }
852                 }
853
854                 reloc_offset[i] = total;
855                 total += exec[i].relocation_count;
856         }
857
858         ret = i915_mutex_lock_interruptible(dev);
859         if (ret) {
860                 mutex_lock(&dev->struct_mutex);
861                 goto err;
862         }
863
864         /* reacquire the objects */
865         eb_reset(eb);
866         ret = eb_lookup_vmas(eb, exec, args, vm, file);
867         if (ret)
868                 goto err;
869
870         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
871         ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
872         if (ret)
873                 goto err;
874
875         list_for_each_entry(vma, &eb->vmas, exec_list) {
876                 int offset = vma->exec_entry - exec;
877                 ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
878                                                             reloc + reloc_offset[offset]);
879                 if (ret)
880                         goto err;
881         }
882
883         /* Leave the user relocations as are, this is the painfully slow path,
884          * and we want to avoid the complication of dropping the lock whilst
885          * having buffers reserved in the aperture and so causing spurious
886          * ENOSPC for random operations.
887          */
888
889 err:
890         drm_free_large(reloc);
891         drm_free_large(reloc_offset);
892         return ret;
893 }
894
895 static int
896 i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
897                                 struct list_head *vmas)
898 {
899         struct i915_vma *vma;
900         uint32_t flush_domains = 0;
901         bool flush_chipset = false;
902         int ret;
903
904         list_for_each_entry(vma, vmas, exec_list) {
905                 struct drm_i915_gem_object *obj = vma->obj;
906                 ret = i915_gem_object_sync(obj, ring);
907                 if (ret)
908                         return ret;
909
910                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
911                         flush_chipset |= i915_gem_clflush_object(obj, false);
912
913                 flush_domains |= obj->base.write_domain;
914         }
915
916         if (flush_chipset)
917                 i915_gem_chipset_flush(ring->dev);
918
919         if (flush_domains & I915_GEM_DOMAIN_GTT)
920                 wmb();
921
922         /* Unconditionally invalidate gpu caches and ensure that we do flush
923          * any residual writes from the previous batch.
924          */
925         return intel_ring_invalidate_all_caches(ring);
926 }
927
928 static bool
929 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
930 {
931         if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
932                 return false;
933
934         return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
935 }
936
937 static int
938 validate_exec_list(struct drm_device *dev,
939                    struct drm_i915_gem_exec_object2 *exec,
940                    int count)
941 {
942         unsigned relocs_total = 0;
943         unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
944         unsigned invalid_flags;
945         int i;
946
947         invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
948         if (USES_FULL_PPGTT(dev))
949                 invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
950
951         for (i = 0; i < count; i++) {
952                 char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
953                 int length; /* limited by fault_in_pages_readable() */
954
955                 if (exec[i].flags & invalid_flags)
956                         return -EINVAL;
957
958                 /* First check for malicious input causing overflow in
959                  * the worst case where we need to allocate the entire
960                  * relocation tree as a single array.
961                  */
962                 if (exec[i].relocation_count > relocs_max - relocs_total)
963                         return -EINVAL;
964                 relocs_total += exec[i].relocation_count;
965
966                 length = exec[i].relocation_count *
967                         sizeof(struct drm_i915_gem_relocation_entry);
968                 /*
969                  * We must check that the entire relocation array is safe
970                  * to read, but since we may need to update the presumed
971                  * offsets during execution, check for full write access.
972                  */
973                 if (!access_ok(VERIFY_WRITE, ptr, length))
974                         return -EFAULT;
975
976                 if (likely(!i915.prefault_disable)) {
977                         if (fault_in_multipages_readable(ptr, length))
978                                 return -EFAULT;
979                 }
980         }
981
982         return 0;
983 }
984
985 static struct intel_context *
986 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
987                           struct intel_engine_cs *ring, const u32 ctx_id)
988 {
989         struct intel_context *ctx = NULL;
990         struct i915_ctx_hang_stats *hs;
991
992         if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
993                 return ERR_PTR(-EINVAL);
994
995         ctx = i915_gem_context_get(file->driver_priv, ctx_id);
996         if (IS_ERR(ctx))
997                 return ctx;
998
999         hs = &ctx->hang_stats;
1000         if (hs->banned) {
1001                 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1002                 return ERR_PTR(-EIO);
1003         }
1004
1005         if (i915.enable_execlists && !ctx->engine[ring->id].state) {
1006                 int ret = intel_lr_context_deferred_create(ctx, ring);
1007                 if (ret) {
1008                         DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
1009                         return ERR_PTR(ret);
1010                 }
1011         }
1012
1013         return ctx;
1014 }
1015
1016 void
1017 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1018                                    struct intel_engine_cs *ring)
1019 {
1020         struct drm_i915_gem_request *req = intel_ring_get_request(ring);
1021         struct i915_vma *vma;
1022
1023         list_for_each_entry(vma, vmas, exec_list) {
1024                 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
1025                 struct drm_i915_gem_object *obj = vma->obj;
1026                 u32 old_read = obj->base.read_domains;
1027                 u32 old_write = obj->base.write_domain;
1028
1029                 obj->dirty = 1; /* be paranoid  */
1030                 obj->base.write_domain = obj->base.pending_write_domain;
1031                 if (obj->base.write_domain == 0)
1032                         obj->base.pending_read_domains |= obj->base.read_domains;
1033                 obj->base.read_domains = obj->base.pending_read_domains;
1034
1035                 i915_vma_move_to_active(vma, ring);
1036                 if (obj->base.write_domain) {
1037                         i915_gem_request_assign(&obj->last_write_req, req);
1038
1039                         intel_fb_obj_invalidate(obj, ring, ORIGIN_CS);
1040
1041                         /* update for the implicit flush after a batch */
1042                         obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1043                 }
1044                 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
1045                         i915_gem_request_assign(&obj->last_fenced_req, req);
1046                         if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
1047                                 struct drm_i915_private *dev_priv = to_i915(ring->dev);
1048                                 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1049                                                &dev_priv->mm.fence_list);
1050                         }
1051                 }
1052
1053                 trace_i915_gem_object_change_domain(obj, old_read, old_write);
1054         }
1055 }
1056
1057 void
1058 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
1059                                     struct drm_file *file,
1060                                     struct intel_engine_cs *ring,
1061                                     struct drm_i915_gem_object *obj)
1062 {
1063         /* Unconditionally force add_request to emit a full flush. */
1064         ring->gpu_caches_dirty = true;
1065
1066         /* Add a breadcrumb for the completion of the batch buffer */
1067         (void)__i915_add_request(ring, file, obj);
1068 }
1069
1070 static int
1071 i915_reset_gen7_sol_offsets(struct drm_device *dev,
1072                             struct intel_engine_cs *ring)
1073 {
1074         struct drm_i915_private *dev_priv = dev->dev_private;
1075         int ret, i;
1076
1077         if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1078                 DRM_DEBUG("sol reset is gen7/rcs only\n");
1079                 return -EINVAL;
1080         }
1081
1082         ret = intel_ring_begin(ring, 4 * 3);
1083         if (ret)
1084                 return ret;
1085
1086         for (i = 0; i < 4; i++) {
1087                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1088                 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1089                 intel_ring_emit(ring, 0);
1090         }
1091
1092         intel_ring_advance(ring);
1093
1094         return 0;
1095 }
1096
1097 static int
1098 i915_emit_box(struct intel_engine_cs *ring,
1099               struct drm_clip_rect *box,
1100               int DR1, int DR4)
1101 {
1102         int ret;
1103
1104         if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
1105             box->y2 <= 0 || box->x2 <= 0) {
1106                 DRM_ERROR("Bad box %d,%d..%d,%d\n",
1107                           box->x1, box->y1, box->x2, box->y2);
1108                 return -EINVAL;
1109         }
1110
1111         if (INTEL_INFO(ring->dev)->gen >= 4) {
1112                 ret = intel_ring_begin(ring, 4);
1113                 if (ret)
1114                         return ret;
1115
1116                 intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO_I965);
1117                 intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
1118                 intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
1119                 intel_ring_emit(ring, DR4);
1120         } else {
1121                 ret = intel_ring_begin(ring, 6);
1122                 if (ret)
1123                         return ret;
1124
1125                 intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO);
1126                 intel_ring_emit(ring, DR1);
1127                 intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
1128                 intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
1129                 intel_ring_emit(ring, DR4);
1130                 intel_ring_emit(ring, 0);
1131         }
1132         intel_ring_advance(ring);
1133
1134         return 0;
1135 }
1136
1137 static struct drm_i915_gem_object*
1138 i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
1139                           struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1140                           struct eb_vmas *eb,
1141                           struct drm_i915_gem_object *batch_obj,
1142                           u32 batch_start_offset,
1143                           u32 batch_len,
1144                           bool is_master)
1145 {
1146         struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
1147         struct drm_i915_gem_object *shadow_batch_obj;
1148         struct i915_vma *vma;
1149         int ret;
1150
1151         shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
1152                                                    PAGE_ALIGN(batch_len));
1153         if (IS_ERR(shadow_batch_obj))
1154                 return shadow_batch_obj;
1155
1156         ret = i915_parse_cmds(ring,
1157                               batch_obj,
1158                               shadow_batch_obj,
1159                               batch_start_offset,
1160                               batch_len,
1161                               is_master);
1162         if (ret)
1163                 goto err;
1164
1165         ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
1166         if (ret)
1167                 goto err;
1168
1169         memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1170
1171         vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1172         vma->exec_entry = shadow_exec_entry;
1173         vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE | __EXEC_OBJECT_HAS_PIN;
1174         drm_gem_object_reference(&shadow_batch_obj->base);
1175         list_add_tail(&vma->exec_list, &eb->vmas);
1176
1177         shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
1178
1179         return shadow_batch_obj;
1180
1181 err:
1182         if (ret == -EACCES) /* unhandled chained batch */
1183                 return batch_obj;
1184         else
1185                 return ERR_PTR(ret);
1186 }
1187
1188 int
1189 i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
1190                                struct intel_engine_cs *ring,
1191                                struct intel_context *ctx,
1192                                struct drm_i915_gem_execbuffer2 *args,
1193                                struct list_head *vmas,
1194                                struct drm_i915_gem_object *batch_obj,
1195                                u64 exec_start, u32 dispatch_flags)
1196 {
1197         struct drm_clip_rect *cliprects = NULL;
1198         struct drm_i915_private *dev_priv = dev->dev_private;
1199         u64 exec_len;
1200         int instp_mode;
1201         u32 instp_mask;
1202         int i, ret = 0;
1203
1204         if (args->num_cliprects != 0) {
1205                 if (ring != &dev_priv->ring[RCS]) {
1206                         DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1207                         return -EINVAL;
1208                 }
1209
1210                 if (INTEL_INFO(dev)->gen >= 5) {
1211                         DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1212                         return -EINVAL;
1213                 }
1214
1215                 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1216                         DRM_DEBUG("execbuf with %u cliprects\n",
1217                                   args->num_cliprects);
1218                         return -EINVAL;
1219                 }
1220
1221                 cliprects = kcalloc(args->num_cliprects,
1222                                     sizeof(*cliprects),
1223                                     GFP_KERNEL);
1224                 if (cliprects == NULL) {
1225                         ret = -ENOMEM;
1226                         goto error;
1227                 }
1228
1229                 if (copy_from_user(cliprects,
1230                                    to_user_ptr(args->cliprects_ptr),
1231                                    sizeof(*cliprects)*args->num_cliprects)) {
1232                         ret = -EFAULT;
1233                         goto error;
1234                 }
1235         } else {
1236                 if (args->DR4 == 0xffffffff) {
1237                         DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1238                         args->DR4 = 0;
1239                 }
1240
1241                 if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1242                         DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1243                         return -EINVAL;
1244                 }
1245         }
1246
1247         ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
1248         if (ret)
1249                 goto error;
1250
1251         ret = i915_switch_context(ring, ctx);
1252         if (ret)
1253                 goto error;
1254
1255         if (ctx->ppgtt)
1256                 WARN(ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
1257                         "%s didn't clear reload\n", ring->name);
1258         else if (dev_priv->mm.aliasing_ppgtt)
1259                 WARN(dev_priv->mm.aliasing_ppgtt->pd_dirty_rings &
1260                         (1<<ring->id), "%s didn't clear reload\n", ring->name);
1261
1262         instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1263         instp_mask = I915_EXEC_CONSTANTS_MASK;
1264         switch (instp_mode) {
1265         case I915_EXEC_CONSTANTS_REL_GENERAL:
1266         case I915_EXEC_CONSTANTS_ABSOLUTE:
1267         case I915_EXEC_CONSTANTS_REL_SURFACE:
1268                 if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1269                         DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1270                         ret = -EINVAL;
1271                         goto error;
1272                 }
1273
1274                 if (instp_mode != dev_priv->relative_constants_mode) {
1275                         if (INTEL_INFO(dev)->gen < 4) {
1276                                 DRM_DEBUG("no rel constants on pre-gen4\n");
1277                                 ret = -EINVAL;
1278                                 goto error;
1279                         }
1280
1281                         if (INTEL_INFO(dev)->gen > 5 &&
1282                             instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1283                                 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1284                                 ret = -EINVAL;
1285                                 goto error;
1286                         }
1287
1288                         /* The HW changed the meaning on this bit on gen6 */
1289                         if (INTEL_INFO(dev)->gen >= 6)
1290                                 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1291                 }
1292                 break;
1293         default:
1294                 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1295                 ret = -EINVAL;
1296                 goto error;
1297         }
1298
1299         if (ring == &dev_priv->ring[RCS] &&
1300                         instp_mode != dev_priv->relative_constants_mode) {
1301                 ret = intel_ring_begin(ring, 4);
1302                 if (ret)
1303                         goto error;
1304
1305                 intel_ring_emit(ring, MI_NOOP);
1306                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1307                 intel_ring_emit(ring, INSTPM);
1308                 intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1309                 intel_ring_advance(ring);
1310
1311                 dev_priv->relative_constants_mode = instp_mode;
1312         }
1313
1314         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1315                 ret = i915_reset_gen7_sol_offsets(dev, ring);
1316                 if (ret)
1317                         goto error;
1318         }
1319
1320         exec_len = args->batch_len;
1321         if (cliprects) {
1322                 for (i = 0; i < args->num_cliprects; i++) {
1323                         ret = i915_emit_box(ring, &cliprects[i],
1324                                             args->DR1, args->DR4);
1325                         if (ret)
1326                                 goto error;
1327
1328                         ret = ring->dispatch_execbuffer(ring,
1329                                                         exec_start, exec_len,
1330                                                         dispatch_flags);
1331                         if (ret)
1332                                 goto error;
1333                 }
1334         } else {
1335                 ret = ring->dispatch_execbuffer(ring,
1336                                                 exec_start, exec_len,
1337                                                 dispatch_flags);
1338                 if (ret)
1339                         return ret;
1340         }
1341
1342 #ifndef CONFIG_PREEMPT_RT_BASE
1343         trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
1344 #endif
1345
1346         i915_gem_execbuffer_move_to_active(vmas, ring);
1347         i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1348
1349 error:
1350         kfree(cliprects);
1351         return ret;
1352 }
1353
1354 /**
1355  * Find one BSD ring to dispatch the corresponding BSD command.
1356  * The Ring ID is returned.
1357  */
1358 static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1359                                   struct drm_file *file)
1360 {
1361         struct drm_i915_private *dev_priv = dev->dev_private;
1362         struct drm_i915_file_private *file_priv = file->driver_priv;
1363
1364         /* Check whether the file_priv is using one ring */
1365         if (file_priv->bsd_ring)
1366                 return file_priv->bsd_ring->id;
1367         else {
1368                 /* If no, use the ping-pong mechanism to select one ring */
1369                 int ring_id;
1370
1371                 mutex_lock(&dev->struct_mutex);
1372                 if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1373                         ring_id = VCS;
1374                         dev_priv->mm.bsd_ring_dispatch_index = 1;
1375                 } else {
1376                         ring_id = VCS2;
1377                         dev_priv->mm.bsd_ring_dispatch_index = 0;
1378                 }
1379                 file_priv->bsd_ring = &dev_priv->ring[ring_id];
1380                 mutex_unlock(&dev->struct_mutex);
1381                 return ring_id;
1382         }
1383 }
1384
1385 static struct drm_i915_gem_object *
1386 eb_get_batch(struct eb_vmas *eb)
1387 {
1388         struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1389
1390         /*
1391          * SNA is doing fancy tricks with compressing batch buffers, which leads
1392          * to negative relocation deltas. Usually that works out ok since the
1393          * relocate address is still positive, except when the batch is placed
1394          * very low in the GTT. Ensure this doesn't happen.
1395          *
1396          * Note that actual hangs have only been observed on gen7, but for
1397          * paranoia do it everywhere.
1398          */
1399         vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1400
1401         return vma->obj;
1402 }
1403
1404 static int
1405 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1406                        struct drm_file *file,
1407                        struct drm_i915_gem_execbuffer2 *args,
1408                        struct drm_i915_gem_exec_object2 *exec)
1409 {
1410         struct drm_i915_private *dev_priv = dev->dev_private;
1411         struct eb_vmas *eb;
1412         struct drm_i915_gem_object *batch_obj;
1413         struct drm_i915_gem_exec_object2 shadow_exec_entry;
1414         struct intel_engine_cs *ring;
1415         struct intel_context *ctx;
1416         struct i915_address_space *vm;
1417         const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1418         u64 exec_start = args->batch_start_offset;
1419         u32 dispatch_flags;
1420         int ret;
1421         bool need_relocs;
1422
1423         if (!i915_gem_check_execbuffer(args))
1424                 return -EINVAL;
1425
1426         ret = validate_exec_list(dev, exec, args->buffer_count);
1427         if (ret)
1428                 return ret;
1429
1430         dispatch_flags = 0;
1431         if (args->flags & I915_EXEC_SECURE) {
1432                 if (!file->is_master || !capable(CAP_SYS_ADMIN))
1433                     return -EPERM;
1434
1435                 dispatch_flags |= I915_DISPATCH_SECURE;
1436         }
1437         if (args->flags & I915_EXEC_IS_PINNED)
1438                 dispatch_flags |= I915_DISPATCH_PINNED;
1439
1440         if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
1441                 DRM_DEBUG("execbuf with unknown ring: %d\n",
1442                           (int)(args->flags & I915_EXEC_RING_MASK));
1443                 return -EINVAL;
1444         }
1445
1446         if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&
1447             ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1448                 DRM_DEBUG("execbuf with non bsd ring but with invalid "
1449                         "bsd dispatch flags: %d\n", (int)(args->flags));
1450                 return -EINVAL;
1451         } 
1452
1453         if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1454                 ring = &dev_priv->ring[RCS];
1455         else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1456                 if (HAS_BSD2(dev)) {
1457                         int ring_id;
1458
1459                         switch (args->flags & I915_EXEC_BSD_MASK) {
1460                         case I915_EXEC_BSD_DEFAULT:
1461                                 ring_id = gen8_dispatch_bsd_ring(dev, file);
1462                                 ring = &dev_priv->ring[ring_id];
1463                                 break;
1464                         case I915_EXEC_BSD_RING1:
1465                                 ring = &dev_priv->ring[VCS];
1466                                 break;
1467                         case I915_EXEC_BSD_RING2:
1468                                 ring = &dev_priv->ring[VCS2];
1469                                 break;
1470                         default:
1471                                 DRM_DEBUG("execbuf with unknown bsd ring: %d\n",
1472                                           (int)(args->flags & I915_EXEC_BSD_MASK));
1473                                 return -EINVAL;
1474                         }
1475                 } else
1476                         ring = &dev_priv->ring[VCS];
1477         } else
1478                 ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1479
1480         if (!intel_ring_initialized(ring)) {
1481                 DRM_DEBUG("execbuf with invalid ring: %d\n",
1482                           (int)(args->flags & I915_EXEC_RING_MASK));
1483                 return -EINVAL;
1484         }
1485
1486         if (args->buffer_count < 1) {
1487                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1488                 return -EINVAL;
1489         }
1490
1491         intel_runtime_pm_get(dev_priv);
1492
1493         ret = i915_mutex_lock_interruptible(dev);
1494         if (ret)
1495                 goto pre_mutex_err;
1496
1497         ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1498         if (IS_ERR(ctx)) {
1499                 mutex_unlock(&dev->struct_mutex);
1500                 ret = PTR_ERR(ctx);
1501                 goto pre_mutex_err;
1502         }
1503
1504         i915_gem_context_reference(ctx);
1505
1506         if (ctx->ppgtt)
1507                 vm = &ctx->ppgtt->base;
1508         else
1509                 vm = &dev_priv->gtt.base;
1510
1511         eb = eb_create(args);
1512         if (eb == NULL) {
1513                 i915_gem_context_unreference(ctx);
1514                 mutex_unlock(&dev->struct_mutex);
1515                 ret = -ENOMEM;
1516                 goto pre_mutex_err;
1517         }
1518
1519         /* Look up object handles */
1520         ret = eb_lookup_vmas(eb, exec, args, vm, file);
1521         if (ret)
1522                 goto err;
1523
1524         /* take note of the batch buffer before we might reorder the lists */
1525         batch_obj = eb_get_batch(eb);
1526
1527         /* Move the objects en-masse into the GTT, evicting if necessary. */
1528         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1529         ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1530         if (ret)
1531                 goto err;
1532
1533         /* The objects are in their final locations, apply the relocations. */
1534         if (need_relocs)
1535                 ret = i915_gem_execbuffer_relocate(eb);
1536         if (ret) {
1537                 if (ret == -EFAULT) {
1538                         ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1539                                                                 eb, exec);
1540                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1541                 }
1542                 if (ret)
1543                         goto err;
1544         }
1545
1546         /* Set the pending read domains for the batch buffer to COMMAND */
1547         if (batch_obj->base.pending_write_domain) {
1548                 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1549                 ret = -EINVAL;
1550                 goto err;
1551         }
1552
1553         if (i915_needs_cmd_parser(ring) && args->batch_len) {
1554                 batch_obj = i915_gem_execbuffer_parse(ring,
1555                                                       &shadow_exec_entry,
1556                                                       eb,
1557                                                       batch_obj,
1558                                                       args->batch_start_offset,
1559                                                       args->batch_len,
1560                                                       file->is_master);
1561                 if (IS_ERR(batch_obj)) {
1562                         ret = PTR_ERR(batch_obj);
1563                         goto err;
1564                 }
1565
1566                 /*
1567                  * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1568                  * bit from MI_BATCH_BUFFER_START commands issued in the
1569                  * dispatch_execbuffer implementations. We specifically
1570                  * don't want that set when the command parser is
1571                  * enabled.
1572                  *
1573                  * FIXME: with aliasing ppgtt, buffers that should only
1574                  * be in ggtt still end up in the aliasing ppgtt. remove
1575                  * this check when that is fixed.
1576                  */
1577                 if (USES_FULL_PPGTT(dev))
1578                         dispatch_flags |= I915_DISPATCH_SECURE;
1579
1580                 exec_start = 0;
1581         }
1582
1583         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1584
1585         /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1586          * batch" bit. Hence we need to pin secure batches into the global gtt.
1587          * hsw should have this fixed, but bdw mucks it up again. */
1588         if (dispatch_flags & I915_DISPATCH_SECURE) {
1589                 /*
1590                  * So on first glance it looks freaky that we pin the batch here
1591                  * outside of the reservation loop. But:
1592                  * - The batch is already pinned into the relevant ppgtt, so we
1593                  *   already have the backing storage fully allocated.
1594                  * - No other BO uses the global gtt (well contexts, but meh),
1595                  *   so we don't really have issues with multiple objects not
1596                  *   fitting due to fragmentation.
1597                  * So this is actually safe.
1598                  */
1599                 ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1600                 if (ret)
1601                         goto err;
1602
1603                 exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1604         } else
1605                 exec_start += i915_gem_obj_offset(batch_obj, vm);
1606
1607         ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
1608                                       &eb->vmas, batch_obj, exec_start,
1609                                       dispatch_flags);
1610
1611         /*
1612          * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1613          * batch vma for correctness. For less ugly and less fragility this
1614          * needs to be adjusted to also track the ggtt batch vma properly as
1615          * active.
1616          */
1617         if (dispatch_flags & I915_DISPATCH_SECURE)
1618                 i915_gem_object_ggtt_unpin(batch_obj);
1619 err:
1620         /* the request owns the ref now */
1621         i915_gem_context_unreference(ctx);
1622         eb_destroy(eb);
1623
1624         mutex_unlock(&dev->struct_mutex);
1625
1626 pre_mutex_err:
1627         /* intel_gpu_busy should also get a ref, so it will free when the device
1628          * is really idle. */
1629         intel_runtime_pm_put(dev_priv);
1630         return ret;
1631 }
1632
1633 /*
1634  * Legacy execbuffer just creates an exec2 list from the original exec object
1635  * list array and passes it to the real function.
1636  */
1637 int
1638 i915_gem_execbuffer(struct drm_device *dev, void *data,
1639                     struct drm_file *file)
1640 {
1641         struct drm_i915_gem_execbuffer *args = data;
1642         struct drm_i915_gem_execbuffer2 exec2;
1643         struct drm_i915_gem_exec_object *exec_list = NULL;
1644         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1645         int ret, i;
1646
1647         if (args->buffer_count < 1) {
1648                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1649                 return -EINVAL;
1650         }
1651
1652         /* Copy in the exec list from userland */
1653         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1654         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1655         if (exec_list == NULL || exec2_list == NULL) {
1656                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1657                           args->buffer_count);
1658                 drm_free_large(exec_list);
1659                 drm_free_large(exec2_list);
1660                 return -ENOMEM;
1661         }
1662         ret = copy_from_user(exec_list,
1663                              to_user_ptr(args->buffers_ptr),
1664                              sizeof(*exec_list) * args->buffer_count);
1665         if (ret != 0) {
1666                 DRM_DEBUG("copy %d exec entries failed %d\n",
1667                           args->buffer_count, ret);
1668                 drm_free_large(exec_list);
1669                 drm_free_large(exec2_list);
1670                 return -EFAULT;
1671         }
1672
1673         for (i = 0; i < args->buffer_count; i++) {
1674                 exec2_list[i].handle = exec_list[i].handle;
1675                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1676                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1677                 exec2_list[i].alignment = exec_list[i].alignment;
1678                 exec2_list[i].offset = exec_list[i].offset;
1679                 if (INTEL_INFO(dev)->gen < 4)
1680                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1681                 else
1682                         exec2_list[i].flags = 0;
1683         }
1684
1685         exec2.buffers_ptr = args->buffers_ptr;
1686         exec2.buffer_count = args->buffer_count;
1687         exec2.batch_start_offset = args->batch_start_offset;
1688         exec2.batch_len = args->batch_len;
1689         exec2.DR1 = args->DR1;
1690         exec2.DR4 = args->DR4;
1691         exec2.num_cliprects = args->num_cliprects;
1692         exec2.cliprects_ptr = args->cliprects_ptr;
1693         exec2.flags = I915_EXEC_RENDER;
1694         i915_execbuffer2_set_context_id(exec2, 0);
1695
1696         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1697         if (!ret) {
1698                 struct drm_i915_gem_exec_object __user *user_exec_list =
1699                         to_user_ptr(args->buffers_ptr);
1700
1701                 /* Copy the new buffer offsets back to the user's exec list. */
1702                 for (i = 0; i < args->buffer_count; i++) {
1703                         ret = __copy_to_user(&user_exec_list[i].offset,
1704                                              &exec2_list[i].offset,
1705                                              sizeof(user_exec_list[i].offset));
1706                         if (ret) {
1707                                 ret = -EFAULT;
1708                                 DRM_DEBUG("failed to copy %d exec entries "
1709                                           "back to user (%d)\n",
1710                                           args->buffer_count, ret);
1711                                 break;
1712                         }
1713                 }
1714         }
1715
1716         drm_free_large(exec_list);
1717         drm_free_large(exec2_list);
1718         return ret;
1719 }
1720
1721 int
1722 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1723                      struct drm_file *file)
1724 {
1725         struct drm_i915_gem_execbuffer2 *args = data;
1726         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1727         int ret;
1728
1729         if (args->buffer_count < 1 ||
1730             args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1731                 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1732                 return -EINVAL;
1733         }
1734
1735         if (args->rsvd2 != 0) {
1736                 DRM_DEBUG("dirty rvsd2 field\n");
1737                 return -EINVAL;
1738         }
1739
1740         exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1741                              GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1742         if (exec2_list == NULL)
1743                 exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1744                                            args->buffer_count);
1745         if (exec2_list == NULL) {
1746                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1747                           args->buffer_count);
1748                 return -ENOMEM;
1749         }
1750         ret = copy_from_user(exec2_list,
1751                              to_user_ptr(args->buffers_ptr),
1752                              sizeof(*exec2_list) * args->buffer_count);
1753         if (ret != 0) {
1754                 DRM_DEBUG("copy %d exec entries failed %d\n",
1755                           args->buffer_count, ret);
1756                 drm_free_large(exec2_list);
1757                 return -EFAULT;
1758         }
1759
1760         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1761         if (!ret) {
1762                 /* Copy the new buffer offsets back to the user's exec list. */
1763                 struct drm_i915_gem_exec_object2 __user *user_exec_list =
1764                                    to_user_ptr(args->buffers_ptr);
1765                 int i;
1766
1767                 for (i = 0; i < args->buffer_count; i++) {
1768                         ret = __copy_to_user(&user_exec_list[i].offset,
1769                                              &exec2_list[i].offset,
1770                                              sizeof(user_exec_list[i].offset));
1771                         if (ret) {
1772                                 ret = -EFAULT;
1773                                 DRM_DEBUG("failed to copy %d exec entries "
1774                                           "back to user\n",
1775                                           args->buffer_count);
1776                                 break;
1777                         }
1778                 }
1779         }
1780
1781         drm_free_large(exec2_list);
1782         return ret;
1783 }