These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / gpu / drm / amd / amdkfd / kfd_dbgdev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/log2.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/mutex.h>
30 #include <linux/device.h>
31
32 #include "kfd_pm4_headers.h"
33 #include "kfd_pm4_headers_diq.h"
34 #include "kfd_kernel_queue.h"
35 #include "kfd_priv.h"
36 #include "kfd_pm4_opcodes.h"
37 #include "cik_regs.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_dbgdev.h"
40 #include "kfd_device_queue_manager.h"
41 #include "../../radeon/cik_reg.h"
42
43 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
44 {
45         BUG_ON(!dev || !dev->kfd2kgd);
46
47         dev->kfd2kgd->address_watch_disable(dev->kgd);
48 }
49
50 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
51                                 unsigned int pasid, uint64_t vmid0_address,
52                                 uint32_t *packet_buff, size_t size_in_bytes)
53 {
54         struct pm4__release_mem *rm_packet;
55         struct pm4__indirect_buffer_pasid *ib_packet;
56         struct kfd_mem_obj *mem_obj;
57         size_t pq_packets_size_in_bytes;
58         union ULARGE_INTEGER *largep;
59         union ULARGE_INTEGER addr;
60         struct kernel_queue *kq;
61         uint64_t *rm_state;
62         unsigned int *ib_packet_buff;
63         int status;
64
65         BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
66
67         kq = dbgdev->kq;
68
69         pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
70                                 sizeof(struct pm4__indirect_buffer_pasid);
71
72         /*
73          * We acquire a buffer from DIQ
74          * The receive packet buff will be sitting on the Indirect Buffer
75          * and in the PQ we put the IB packet + sync packet(s).
76          */
77         status = kq->ops.acquire_packet_buffer(kq,
78                                 pq_packets_size_in_bytes / sizeof(uint32_t),
79                                 &ib_packet_buff);
80         if (status != 0) {
81                 pr_err("amdkfd: acquire_packet_buffer failed\n");
82                 return status;
83         }
84
85         memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
86
87         ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
88
89         ib_packet->header.count = 3;
90         ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
91         ib_packet->header.type = PM4_TYPE_3;
92
93         largep = (union ULARGE_INTEGER *) &vmid0_address;
94
95         ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
96         ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
97
98         ib_packet->control = (1 << 23) | (1 << 31) |
99                         ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
100
101         ib_packet->bitfields5.pasid = pasid;
102
103         /*
104          * for now we use release mem for GPU-CPU synchronization
105          * Consider WaitRegMem + WriteData as a better alternative
106          * we get a GART allocations ( gpu/cpu mapping),
107          * for the sync variable, and wait until:
108          * (a) Sync with HW
109          * (b) Sync var is written by CP to mem.
110          */
111         rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
112                         (sizeof(struct pm4__indirect_buffer_pasid) /
113                                         sizeof(unsigned int)));
114
115         status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
116                                         &mem_obj);
117
118         if (status != 0) {
119                 pr_err("amdkfd: Failed to allocate GART memory\n");
120                 kq->ops.rollback_packet(kq);
121                 return status;
122         }
123
124         rm_state = (uint64_t *) mem_obj->cpu_ptr;
125
126         *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
127
128         rm_packet->header.opcode = IT_RELEASE_MEM;
129         rm_packet->header.type = PM4_TYPE_3;
130         rm_packet->header.count = sizeof(struct pm4__release_mem) /
131                                         sizeof(unsigned int) - 2;
132
133         rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
134         rm_packet->bitfields2.event_index =
135                                 event_index___release_mem__end_of_pipe;
136
137         rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
138         rm_packet->bitfields2.atc = 0;
139         rm_packet->bitfields2.tc_wb_action_ena = 1;
140
141         addr.quad_part = mem_obj->gpu_addr;
142
143         rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
144         rm_packet->address_hi = addr.u.high_part;
145
146         rm_packet->bitfields3.data_sel =
147                                 data_sel___release_mem__send_64_bit_data;
148
149         rm_packet->bitfields3.int_sel =
150                         int_sel___release_mem__send_data_after_write_confirm;
151
152         rm_packet->bitfields3.dst_sel =
153                         dst_sel___release_mem__memory_controller;
154
155         rm_packet->data_lo = QUEUESTATE__ACTIVE;
156
157         kq->ops.submit_packet(kq);
158
159         /* Wait till CP writes sync code: */
160         status = amdkfd_fence_wait_timeout(
161                         (unsigned int *) rm_state,
162                         QUEUESTATE__ACTIVE, 1500);
163
164         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
165
166         return status;
167 }
168
169 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
170 {
171         BUG_ON(!dbgdev);
172
173         /*
174          * no action is needed in this case,
175          * just make sure diq will not be used
176          */
177
178         dbgdev->kq = NULL;
179
180         return 0;
181 }
182
183 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
184 {
185         struct queue_properties properties;
186         unsigned int qid;
187         struct kernel_queue *kq = NULL;
188         int status;
189
190         BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
191
192         status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
193                                 &properties, 0, KFD_QUEUE_TYPE_DIQ,
194                                 &qid);
195
196         if (status) {
197                 pr_err("amdkfd: Failed to create DIQ\n");
198                 return status;
199         }
200
201         pr_debug("DIQ Created with queue id: %d\n", qid);
202
203         kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
204
205         if (kq == NULL) {
206                 pr_err("amdkfd: Error getting DIQ\n");
207                 pqm_destroy_queue(dbgdev->pqm, qid);
208                 return -EFAULT;
209         }
210
211         dbgdev->kq = kq;
212
213         return status;
214 }
215
216 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
217 {
218         BUG_ON(!dbgdev || !dbgdev->dev);
219
220         /* disable watch address */
221         dbgdev_address_watch_disable_nodiq(dbgdev->dev);
222         return 0;
223 }
224
225 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
226 {
227         /* todo - disable address watch */
228         int status;
229
230         BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
231
232         status = pqm_destroy_queue(dbgdev->pqm,
233                         dbgdev->kq->queue->properties.queue_id);
234         dbgdev->kq = NULL;
235
236         return status;
237 }
238
239 static void dbgdev_address_watch_set_registers(
240                         const struct dbg_address_watch_info *adw_info,
241                         union TCP_WATCH_ADDR_H_BITS *addrHi,
242                         union TCP_WATCH_ADDR_L_BITS *addrLo,
243                         union TCP_WATCH_CNTL_BITS *cntl,
244                         unsigned int index, unsigned int vmid)
245 {
246         union ULARGE_INTEGER addr;
247
248         BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
249
250         addr.quad_part = 0;
251         addrHi->u32All = 0;
252         addrLo->u32All = 0;
253         cntl->u32All = 0;
254
255         if (adw_info->watch_mask != NULL)
256                 cntl->bitfields.mask =
257                         (uint32_t) (adw_info->watch_mask[index] &
258                                         ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
259         else
260                 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
261
262         addr.quad_part = (unsigned long long) adw_info->watch_address[index];
263
264         addrHi->bitfields.addr = addr.u.high_part &
265                                         ADDRESS_WATCH_REG_ADDHIGH_MASK;
266         addrLo->bitfields.addr =
267                         (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
268
269         cntl->bitfields.mode = adw_info->watch_mode[index];
270         cntl->bitfields.vmid = (uint32_t) vmid;
271         /* for now assume it is an ATC address */
272         cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
273
274         pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
275         pr_debug("\t\t%20s %08x\n", "set reg add high :",
276                         addrHi->bitfields.addr);
277         pr_debug("\t\t%20s %08x\n", "set reg add low :",
278                         addrLo->bitfields.addr);
279 }
280
281 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
282                                         struct dbg_address_watch_info *adw_info)
283 {
284         union TCP_WATCH_ADDR_H_BITS addrHi;
285         union TCP_WATCH_ADDR_L_BITS addrLo;
286         union TCP_WATCH_CNTL_BITS cntl;
287         struct kfd_process_device *pdd;
288         unsigned int i;
289
290         BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
291
292         /* taking the vmid for that process on the safe way using pdd */
293         pdd = kfd_get_process_device_data(dbgdev->dev,
294                                         adw_info->process);
295         if (!pdd) {
296                 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
297                 return -EFAULT;
298         }
299
300         addrHi.u32All = 0;
301         addrLo.u32All = 0;
302         cntl.u32All = 0;
303
304         if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
305                         (adw_info->num_watch_points == 0)) {
306                 pr_err("amdkfd: num_watch_points is invalid\n");
307                 return -EINVAL;
308         }
309
310         if ((adw_info->watch_mode == NULL) ||
311                 (adw_info->watch_address == NULL)) {
312                 pr_err("amdkfd: adw_info fields are not valid\n");
313                 return -EINVAL;
314         }
315
316         for (i = 0 ; i < adw_info->num_watch_points ; i++) {
317                 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
318                                                 &cntl, i, pdd->qpd.vmid);
319
320                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
321                 pr_debug("\t\t%20s %08x\n", "register index :", i);
322                 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
323                 pr_debug("\t\t%20s %08x\n", "Address Low is :",
324                                 addrLo.bitfields.addr);
325                 pr_debug("\t\t%20s %08x\n", "Address high is :",
326                                 addrHi.bitfields.addr);
327                 pr_debug("\t\t%20s %08x\n", "Address high is :",
328                                 addrHi.bitfields.addr);
329                 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
330                                 cntl.bitfields.mask);
331                 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
332                                 cntl.bitfields.mode);
333                 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
334                                 cntl.bitfields.vmid);
335                 pr_debug("\t\t%20s %08x\n", "Control atc  is :",
336                                 cntl.bitfields.atc);
337                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
338
339                 pdd->dev->kfd2kgd->address_watch_execute(
340                                                 dbgdev->dev->kgd,
341                                                 i,
342                                                 cntl.u32All,
343                                                 addrHi.u32All,
344                                                 addrLo.u32All);
345         }
346
347         return 0;
348 }
349
350 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
351                                         struct dbg_address_watch_info *adw_info)
352 {
353         struct pm4__set_config_reg *packets_vec;
354         union TCP_WATCH_ADDR_H_BITS addrHi;
355         union TCP_WATCH_ADDR_L_BITS addrLo;
356         union TCP_WATCH_CNTL_BITS cntl;
357         struct kfd_mem_obj *mem_obj;
358         unsigned int aw_reg_add_dword;
359         uint32_t *packet_buff_uint;
360         unsigned int i;
361         int status;
362         size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
363         /* we do not control the vmid in DIQ mode, just a place holder */
364         unsigned int vmid = 0;
365
366         BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
367
368         addrHi.u32All = 0;
369         addrLo.u32All = 0;
370         cntl.u32All = 0;
371
372         if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
373                         (adw_info->num_watch_points == 0)) {
374                 pr_err("amdkfd: num_watch_points is invalid\n");
375                 return -EINVAL;
376         }
377
378         if ((NULL == adw_info->watch_mode) ||
379                         (NULL == adw_info->watch_address)) {
380                 pr_err("amdkfd: adw_info fields are not valid\n");
381                 return -EINVAL;
382         }
383
384         status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
385
386         if (status != 0) {
387                 pr_err("amdkfd: Failed to allocate GART memory\n");
388                 return status;
389         }
390
391         packet_buff_uint = mem_obj->cpu_ptr;
392
393         memset(packet_buff_uint, 0, ib_size);
394
395         packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
396
397         packets_vec[0].header.count = 1;
398         packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
399         packets_vec[0].header.type = PM4_TYPE_3;
400         packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
401         packets_vec[0].bitfields2.insert_vmid = 1;
402         packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
403         packets_vec[1].bitfields2.insert_vmid = 0;
404         packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
405         packets_vec[2].bitfields2.insert_vmid = 0;
406         packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
407         packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
408         packets_vec[3].bitfields2.insert_vmid = 1;
409
410         for (i = 0; i < adw_info->num_watch_points; i++) {
411                 dbgdev_address_watch_set_registers(adw_info,
412                                                 &addrHi,
413                                                 &addrLo,
414                                                 &cntl,
415                                                 i,
416                                                 vmid);
417
418                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
419                 pr_debug("\t\t%20s %08x\n", "register index :", i);
420                 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
421                 pr_debug("\t\t%20s %p\n", "Add ptr is :",
422                                 adw_info->watch_address);
423                 pr_debug("\t\t%20s %08llx\n", "Add     is :",
424                                 adw_info->watch_address[i]);
425                 pr_debug("\t\t%20s %08x\n", "Address Low is :",
426                                 addrLo.bitfields.addr);
427                 pr_debug("\t\t%20s %08x\n", "Address high is :",
428                                 addrHi.bitfields.addr);
429                 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
430                                 cntl.bitfields.mask);
431                 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
432                                 cntl.bitfields.mode);
433                 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
434                                 cntl.bitfields.vmid);
435                 pr_debug("\t\t%20s %08x\n", "Control atc  is :",
436                                 cntl.bitfields.atc);
437                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
438
439                 aw_reg_add_dword =
440                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
441                                         dbgdev->dev->kgd,
442                                         i,
443                                         ADDRESS_WATCH_REG_CNTL);
444
445                 aw_reg_add_dword /= sizeof(uint32_t);
446
447                 packets_vec[0].bitfields2.reg_offset =
448                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
449
450                 packets_vec[0].reg_data[0] = cntl.u32All;
451
452                 aw_reg_add_dword =
453                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
454                                         dbgdev->dev->kgd,
455                                         i,
456                                         ADDRESS_WATCH_REG_ADDR_HI);
457
458                 aw_reg_add_dword /= sizeof(uint32_t);
459
460                 packets_vec[1].bitfields2.reg_offset =
461                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
462                 packets_vec[1].reg_data[0] = addrHi.u32All;
463
464                 aw_reg_add_dword =
465                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
466                                         dbgdev->dev->kgd,
467                                         i,
468                                         ADDRESS_WATCH_REG_ADDR_LO);
469
470                 aw_reg_add_dword /= sizeof(uint32_t);
471
472                 packets_vec[2].bitfields2.reg_offset =
473                                 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
474                 packets_vec[2].reg_data[0] = addrLo.u32All;
475
476                 /* enable watch flag if address is not zero*/
477                 if (adw_info->watch_address[i] > 0)
478                         cntl.bitfields.valid = 1;
479                 else
480                         cntl.bitfields.valid = 0;
481
482                 aw_reg_add_dword =
483                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
484                                         dbgdev->dev->kgd,
485                                         i,
486                                         ADDRESS_WATCH_REG_CNTL);
487
488                 aw_reg_add_dword /= sizeof(uint32_t);
489
490                 packets_vec[3].bitfields2.reg_offset =
491                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
492                 packets_vec[3].reg_data[0] = cntl.u32All;
493
494                 status = dbgdev_diq_submit_ib(
495                                         dbgdev,
496                                         adw_info->process->pasid,
497                                         mem_obj->gpu_addr,
498                                         packet_buff_uint,
499                                         ib_size);
500
501                 if (status != 0) {
502                         pr_err("amdkfd: Failed to submit IB to DIQ\n");
503                         break;
504                 }
505         }
506
507         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
508         return status;
509 }
510
511 static int dbgdev_wave_control_set_registers(
512                                 struct dbg_wave_control_info *wac_info,
513                                 union SQ_CMD_BITS *in_reg_sq_cmd,
514                                 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
515 {
516         int status;
517         union SQ_CMD_BITS reg_sq_cmd;
518         union GRBM_GFX_INDEX_BITS reg_gfx_index;
519         struct HsaDbgWaveMsgAMDGen2 *pMsg;
520
521         BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
522
523         reg_sq_cmd.u32All = 0;
524         reg_gfx_index.u32All = 0;
525         pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
526
527         switch (wac_info->mode) {
528         /* Send command to single wave */
529         case HSA_DBG_WAVEMODE_SINGLE:
530                 /*
531                  * Limit access to the process waves only,
532                  * by setting vmid check
533                  */
534                 reg_sq_cmd.bits.check_vmid = 1;
535                 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
536                 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
537                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
538
539                 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
540                 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
541                 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
542
543                 break;
544
545         /* Send command to all waves with matching VMID */
546         case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
547
548                 reg_gfx_index.bits.sh_broadcast_writes = 1;
549                 reg_gfx_index.bits.se_broadcast_writes = 1;
550                 reg_gfx_index.bits.instance_broadcast_writes = 1;
551
552                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
553
554                 break;
555
556         /* Send command to all CU waves with matching VMID */
557         case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
558
559                 reg_sq_cmd.bits.check_vmid = 1;
560                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
561
562                 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
563                 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
564                 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
565
566                 break;
567
568         default:
569                 return -EINVAL;
570         }
571
572         switch (wac_info->operand) {
573         case HSA_DBG_WAVEOP_HALT:
574                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
575                 break;
576
577         case HSA_DBG_WAVEOP_RESUME:
578                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
579                 break;
580
581         case HSA_DBG_WAVEOP_KILL:
582                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
583                 break;
584
585         case HSA_DBG_WAVEOP_DEBUG:
586                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
587                 break;
588
589         case HSA_DBG_WAVEOP_TRAP:
590                 if (wac_info->trapId < MAX_TRAPID) {
591                         reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
592                         reg_sq_cmd.bits.trap_id = wac_info->trapId;
593                 } else {
594                         status = -EINVAL;
595                 }
596                 break;
597
598         default:
599                 status = -EINVAL;
600                 break;
601         }
602
603         if (status == 0) {
604                 *in_reg_sq_cmd = reg_sq_cmd;
605                 *in_reg_gfx_index = reg_gfx_index;
606         }
607
608         return status;
609 }
610
611 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
612                                         struct dbg_wave_control_info *wac_info)
613 {
614
615         int status;
616         union SQ_CMD_BITS reg_sq_cmd;
617         union GRBM_GFX_INDEX_BITS reg_gfx_index;
618         struct kfd_mem_obj *mem_obj;
619         uint32_t *packet_buff_uint;
620         struct pm4__set_config_reg *packets_vec;
621         size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
622
623         BUG_ON(!dbgdev || !wac_info);
624
625         reg_sq_cmd.u32All = 0;
626
627         status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
628                                                         &reg_gfx_index);
629         if (status) {
630                 pr_err("amdkfd: Failed to set wave control registers\n");
631                 return status;
632         }
633
634         /* we do not control the VMID in DIQ,so reset it to a known value */
635         reg_sq_cmd.bits.vm_id = 0;
636
637         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
638
639         pr_debug("\t\t mode      is: %u\n", wac_info->mode);
640         pr_debug("\t\t operand   is: %u\n", wac_info->operand);
641         pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
642         pr_debug("\t\t msg value is: %u\n",
643                         wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
644         pr_debug("\t\t vmid      is: N/A\n");
645
646         pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
647         pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
648         pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
649         pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
650         pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
651         pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
652         pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
653
654         pr_debug("\t\t ibw       is : %u\n",
655                         reg_gfx_index.bitfields.instance_broadcast_writes);
656         pr_debug("\t\t ii        is : %u\n",
657                         reg_gfx_index.bitfields.instance_index);
658         pr_debug("\t\t sebw      is : %u\n",
659                         reg_gfx_index.bitfields.se_broadcast_writes);
660         pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
661         pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
662         pr_debug("\t\t sbw       is : %u\n",
663                         reg_gfx_index.bitfields.sh_broadcast_writes);
664
665         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
666
667         status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
668
669         if (status != 0) {
670                 pr_err("amdkfd: Failed to allocate GART memory\n");
671                 return status;
672         }
673
674         packet_buff_uint = mem_obj->cpu_ptr;
675
676         memset(packet_buff_uint, 0, ib_size);
677
678         packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
679         packets_vec[0].header.count = 1;
680         packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
681         packets_vec[0].header.type = PM4_TYPE_3;
682         packets_vec[0].bitfields2.reg_offset =
683                         GRBM_GFX_INDEX / (sizeof(uint32_t)) -
684                                 USERCONFIG_REG_BASE;
685
686         packets_vec[0].bitfields2.insert_vmid = 0;
687         packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
688
689         packets_vec[1].header.count = 1;
690         packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
691         packets_vec[1].header.type = PM4_TYPE_3;
692         packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
693                                                 AMD_CONFIG_REG_BASE;
694
695         packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
696         packets_vec[1].bitfields2.insert_vmid = 1;
697         packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
698
699         /* Restore the GRBM_GFX_INDEX register */
700
701         reg_gfx_index.u32All = 0;
702         reg_gfx_index.bits.sh_broadcast_writes = 1;
703         reg_gfx_index.bits.instance_broadcast_writes = 1;
704         reg_gfx_index.bits.se_broadcast_writes = 1;
705
706
707         packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
708         packets_vec[2].bitfields2.reg_offset =
709                                 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
710                                         USERCONFIG_REG_BASE;
711
712         packets_vec[2].bitfields2.insert_vmid = 0;
713         packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
714
715         status = dbgdev_diq_submit_ib(
716                         dbgdev,
717                         wac_info->process->pasid,
718                         mem_obj->gpu_addr,
719                         packet_buff_uint,
720                         ib_size);
721
722         if (status != 0)
723                 pr_err("amdkfd: Failed to submit IB to DIQ\n");
724
725         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
726
727         return status;
728 }
729
730 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
731                                         struct dbg_wave_control_info *wac_info)
732 {
733         int status;
734         union SQ_CMD_BITS reg_sq_cmd;
735         union GRBM_GFX_INDEX_BITS reg_gfx_index;
736         struct kfd_process_device *pdd;
737
738         BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
739
740         reg_sq_cmd.u32All = 0;
741
742         /* taking the VMID for that process on the safe way using PDD */
743         pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
744
745         if (!pdd) {
746                 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
747                 return -EFAULT;
748         }
749         status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
750                                                         &reg_gfx_index);
751         if (status) {
752                 pr_err("amdkfd: Failed to set wave control registers\n");
753                 return status;
754         }
755
756         /* for non DIQ we need to patch the VMID: */
757
758         reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
759
760         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
761
762         pr_debug("\t\t mode      is: %u\n", wac_info->mode);
763         pr_debug("\t\t operand   is: %u\n", wac_info->operand);
764         pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
765         pr_debug("\t\t msg value is: %u\n",
766                         wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
767         pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
768
769         pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
770         pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
771         pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
772         pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
773         pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
774         pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
775         pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
776
777         pr_debug("\t\t ibw       is : %u\n",
778                         reg_gfx_index.bitfields.instance_broadcast_writes);
779         pr_debug("\t\t ii        is : %u\n",
780                         reg_gfx_index.bitfields.instance_index);
781         pr_debug("\t\t sebw      is : %u\n",
782                         reg_gfx_index.bitfields.se_broadcast_writes);
783         pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
784         pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
785         pr_debug("\t\t sbw       is : %u\n",
786                         reg_gfx_index.bitfields.sh_broadcast_writes);
787
788         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
789
790         return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
791                                                         reg_gfx_index.u32All,
792                                                         reg_sq_cmd.u32All);
793 }
794
795 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
796 {
797         int status = 0;
798         unsigned int vmid;
799         union SQ_CMD_BITS reg_sq_cmd;
800         union GRBM_GFX_INDEX_BITS reg_gfx_index;
801         struct kfd_process_device *pdd;
802         struct dbg_wave_control_info wac_info;
803         int temp;
804         int first_vmid_to_scan = 8;
805         int last_vmid_to_scan = 15;
806
807         first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
808         temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
809         last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
810
811         reg_sq_cmd.u32All = 0;
812         status = 0;
813
814         wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
815         wac_info.operand = HSA_DBG_WAVEOP_KILL;
816
817         pr_debug("Killing all process wavefronts\n");
818
819         /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
820          * ATC_VMID15_PASID_MAPPING
821          * to check which VMID the current process is mapped to. */
822
823         for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
824                 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
825                                 (dev->kgd, vmid)) {
826                         if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
827                                         (dev->kgd, vmid) == p->pasid) {
828                                 pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
829                                                 vmid, p->pasid);
830                                 break;
831                         }
832                 }
833         }
834
835         if (vmid > last_vmid_to_scan) {
836                 pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
837                 return -EFAULT;
838         }
839
840         /* taking the VMID for that process on the safe way using PDD */
841         pdd = kfd_get_process_device_data(dev, p);
842         if (!pdd)
843                 return -EFAULT;
844
845         status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
846                         &reg_gfx_index);
847         if (status != 0)
848                 return -EINVAL;
849
850         /* for non DIQ we need to patch the VMID: */
851         reg_sq_cmd.bits.vm_id = vmid;
852
853         dev->kfd2kgd->wave_control_execute(dev->kgd,
854                                         reg_gfx_index.u32All,
855                                         reg_sq_cmd.u32All);
856
857         return 0;
858 }
859
860 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
861                         enum DBGDEV_TYPE type)
862 {
863         BUG_ON(!pdbgdev || !pdev);
864
865         pdbgdev->dev = pdev;
866         pdbgdev->kq = NULL;
867         pdbgdev->type = type;
868         pdbgdev->pqm = NULL;
869
870         switch (type) {
871         case DBGDEV_TYPE_NODIQ:
872                 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
873                 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
874                 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
875                 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
876                 break;
877         case DBGDEV_TYPE_DIQ:
878         default:
879                 pdbgdev->dbgdev_register = dbgdev_register_diq;
880                 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
881                 pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
882                 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
883                 break;
884         }
885
886 }