These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155                                   u32 reg, u32 *val)
156 {
157         switch (reg) {
158         case GRBM_STATUS:
159         case GRBM_STATUS2:
160         case GRBM_STATUS_SE0:
161         case GRBM_STATUS_SE1:
162         case GRBM_STATUS_SE2:
163         case GRBM_STATUS_SE3:
164         case SRBM_STATUS:
165         case SRBM_STATUS2:
166         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168         case UVD_STATUS:
169         /* TODO VCE */
170                 *val = RREG32(reg);
171                 return 0;
172         default:
173                 return -EINVAL;
174         }
175 }
176
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182         unsigned long flags;
183         u32 r;
184
185         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186         WREG32(CIK_DIDT_IND_INDEX, (reg));
187         r = RREG32(CIK_DIDT_IND_DATA);
188         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189         return r;
190 }
191
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194         unsigned long flags;
195
196         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197         WREG32(CIK_DIDT_IND_INDEX, (reg));
198         WREG32(CIK_DIDT_IND_DATA, (v));
199         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205         u32 temp;
206         int actual_temp = 0;
207
208         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209                 CTF_TEMP_SHIFT;
210
211         if (temp & 0x200)
212                 actual_temp = 255;
213         else
214                 actual_temp = temp & 0x1ff;
215
216         actual_temp = actual_temp * 1000;
217
218         return actual_temp;
219 }
220
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224         u32 temp;
225         int actual_temp = 0;
226
227         temp = RREG32_SMC(0xC0300E0C);
228
229         if (temp)
230                 actual_temp = (temp / 8) - 49;
231         else
232                 actual_temp = 0;
233
234         actual_temp = actual_temp * 1000;
235
236         return actual_temp;
237 }
238
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244         unsigned long flags;
245         u32 r;
246
247         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248         WREG32(PCIE_INDEX, reg);
249         (void)RREG32(PCIE_INDEX);
250         r = RREG32(PCIE_DATA);
251         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252         return r;
253 }
254
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257         unsigned long flags;
258
259         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260         WREG32(PCIE_INDEX, reg);
261         (void)RREG32(PCIE_INDEX);
262         WREG32(PCIE_DATA, v);
263         (void)RREG32(PCIE_DATA);
264         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269         (0x0e00 << 16) | (0xc12c >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc140 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc150 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc15c >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc168 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc170 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc178 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc204 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc2b4 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc2b8 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc2bc >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc2c0 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0x8228 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0x829c >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0x869c >> 2),
298         0x00000000,
299         (0x0600 << 16) | (0x98f4 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0x98f8 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0x9900 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc260 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0x90e8 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x3c000 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x3c00c >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x8c1c >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x9700 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xcd20 >> 2),
318         0x00000000,
319         (0x4e00 << 16) | (0xcd20 >> 2),
320         0x00000000,
321         (0x5e00 << 16) | (0xcd20 >> 2),
322         0x00000000,
323         (0x6e00 << 16) | (0xcd20 >> 2),
324         0x00000000,
325         (0x7e00 << 16) | (0xcd20 >> 2),
326         0x00000000,
327         (0x8e00 << 16) | (0xcd20 >> 2),
328         0x00000000,
329         (0x9e00 << 16) | (0xcd20 >> 2),
330         0x00000000,
331         (0xae00 << 16) | (0xcd20 >> 2),
332         0x00000000,
333         (0xbe00 << 16) | (0xcd20 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x89bc >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x8900 >> 2),
338         0x00000000,
339         0x3,
340         (0x0e00 << 16) | (0xc130 >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0xc134 >> 2),
343         0x00000000,
344         (0x0e00 << 16) | (0xc1fc >> 2),
345         0x00000000,
346         (0x0e00 << 16) | (0xc208 >> 2),
347         0x00000000,
348         (0x0e00 << 16) | (0xc264 >> 2),
349         0x00000000,
350         (0x0e00 << 16) | (0xc268 >> 2),
351         0x00000000,
352         (0x0e00 << 16) | (0xc26c >> 2),
353         0x00000000,
354         (0x0e00 << 16) | (0xc270 >> 2),
355         0x00000000,
356         (0x0e00 << 16) | (0xc274 >> 2),
357         0x00000000,
358         (0x0e00 << 16) | (0xc278 >> 2),
359         0x00000000,
360         (0x0e00 << 16) | (0xc27c >> 2),
361         0x00000000,
362         (0x0e00 << 16) | (0xc280 >> 2),
363         0x00000000,
364         (0x0e00 << 16) | (0xc284 >> 2),
365         0x00000000,
366         (0x0e00 << 16) | (0xc288 >> 2),
367         0x00000000,
368         (0x0e00 << 16) | (0xc28c >> 2),
369         0x00000000,
370         (0x0e00 << 16) | (0xc290 >> 2),
371         0x00000000,
372         (0x0e00 << 16) | (0xc294 >> 2),
373         0x00000000,
374         (0x0e00 << 16) | (0xc298 >> 2),
375         0x00000000,
376         (0x0e00 << 16) | (0xc29c >> 2),
377         0x00000000,
378         (0x0e00 << 16) | (0xc2a0 >> 2),
379         0x00000000,
380         (0x0e00 << 16) | (0xc2a4 >> 2),
381         0x00000000,
382         (0x0e00 << 16) | (0xc2a8 >> 2),
383         0x00000000,
384         (0x0e00 << 16) | (0xc2ac  >> 2),
385         0x00000000,
386         (0x0e00 << 16) | (0xc2b0 >> 2),
387         0x00000000,
388         (0x0e00 << 16) | (0x301d0 >> 2),
389         0x00000000,
390         (0x0e00 << 16) | (0x30238 >> 2),
391         0x00000000,
392         (0x0e00 << 16) | (0x30250 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0x30254 >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x30258 >> 2),
397         0x00000000,
398         (0x0e00 << 16) | (0x3025c >> 2),
399         0x00000000,
400         (0x4e00 << 16) | (0xc900 >> 2),
401         0x00000000,
402         (0x5e00 << 16) | (0xc900 >> 2),
403         0x00000000,
404         (0x6e00 << 16) | (0xc900 >> 2),
405         0x00000000,
406         (0x7e00 << 16) | (0xc900 >> 2),
407         0x00000000,
408         (0x8e00 << 16) | (0xc900 >> 2),
409         0x00000000,
410         (0x9e00 << 16) | (0xc900 >> 2),
411         0x00000000,
412         (0xae00 << 16) | (0xc900 >> 2),
413         0x00000000,
414         (0xbe00 << 16) | (0xc900 >> 2),
415         0x00000000,
416         (0x4e00 << 16) | (0xc904 >> 2),
417         0x00000000,
418         (0x5e00 << 16) | (0xc904 >> 2),
419         0x00000000,
420         (0x6e00 << 16) | (0xc904 >> 2),
421         0x00000000,
422         (0x7e00 << 16) | (0xc904 >> 2),
423         0x00000000,
424         (0x8e00 << 16) | (0xc904 >> 2),
425         0x00000000,
426         (0x9e00 << 16) | (0xc904 >> 2),
427         0x00000000,
428         (0xae00 << 16) | (0xc904 >> 2),
429         0x00000000,
430         (0xbe00 << 16) | (0xc904 >> 2),
431         0x00000000,
432         (0x4e00 << 16) | (0xc908 >> 2),
433         0x00000000,
434         (0x5e00 << 16) | (0xc908 >> 2),
435         0x00000000,
436         (0x6e00 << 16) | (0xc908 >> 2),
437         0x00000000,
438         (0x7e00 << 16) | (0xc908 >> 2),
439         0x00000000,
440         (0x8e00 << 16) | (0xc908 >> 2),
441         0x00000000,
442         (0x9e00 << 16) | (0xc908 >> 2),
443         0x00000000,
444         (0xae00 << 16) | (0xc908 >> 2),
445         0x00000000,
446         (0xbe00 << 16) | (0xc908 >> 2),
447         0x00000000,
448         (0x4e00 << 16) | (0xc90c >> 2),
449         0x00000000,
450         (0x5e00 << 16) | (0xc90c >> 2),
451         0x00000000,
452         (0x6e00 << 16) | (0xc90c >> 2),
453         0x00000000,
454         (0x7e00 << 16) | (0xc90c >> 2),
455         0x00000000,
456         (0x8e00 << 16) | (0xc90c >> 2),
457         0x00000000,
458         (0x9e00 << 16) | (0xc90c >> 2),
459         0x00000000,
460         (0xae00 << 16) | (0xc90c >> 2),
461         0x00000000,
462         (0xbe00 << 16) | (0xc90c >> 2),
463         0x00000000,
464         (0x4e00 << 16) | (0xc910 >> 2),
465         0x00000000,
466         (0x5e00 << 16) | (0xc910 >> 2),
467         0x00000000,
468         (0x6e00 << 16) | (0xc910 >> 2),
469         0x00000000,
470         (0x7e00 << 16) | (0xc910 >> 2),
471         0x00000000,
472         (0x8e00 << 16) | (0xc910 >> 2),
473         0x00000000,
474         (0x9e00 << 16) | (0xc910 >> 2),
475         0x00000000,
476         (0xae00 << 16) | (0xc910 >> 2),
477         0x00000000,
478         (0xbe00 << 16) | (0xc910 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0xc99c >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x9834 >> 2),
483         0x00000000,
484         (0x0000 << 16) | (0x30f00 >> 2),
485         0x00000000,
486         (0x0001 << 16) | (0x30f00 >> 2),
487         0x00000000,
488         (0x0000 << 16) | (0x30f04 >> 2),
489         0x00000000,
490         (0x0001 << 16) | (0x30f04 >> 2),
491         0x00000000,
492         (0x0000 << 16) | (0x30f08 >> 2),
493         0x00000000,
494         (0x0001 << 16) | (0x30f08 >> 2),
495         0x00000000,
496         (0x0000 << 16) | (0x30f0c >> 2),
497         0x00000000,
498         (0x0001 << 16) | (0x30f0c >> 2),
499         0x00000000,
500         (0x0600 << 16) | (0x9b7c >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x8a14 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8a18 >> 2),
505         0x00000000,
506         (0x0600 << 16) | (0x30a00 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8bf0 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8bcc >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8b24 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x30a04 >> 2),
515         0x00000000,
516         (0x0600 << 16) | (0x30a10 >> 2),
517         0x00000000,
518         (0x0600 << 16) | (0x30a14 >> 2),
519         0x00000000,
520         (0x0600 << 16) | (0x30a18 >> 2),
521         0x00000000,
522         (0x0600 << 16) | (0x30a2c >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0xc700 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xc704 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xc708 >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xc768 >> 2),
531         0x00000000,
532         (0x0400 << 16) | (0xc770 >> 2),
533         0x00000000,
534         (0x0400 << 16) | (0xc774 >> 2),
535         0x00000000,
536         (0x0400 << 16) | (0xc778 >> 2),
537         0x00000000,
538         (0x0400 << 16) | (0xc77c >> 2),
539         0x00000000,
540         (0x0400 << 16) | (0xc780 >> 2),
541         0x00000000,
542         (0x0400 << 16) | (0xc784 >> 2),
543         0x00000000,
544         (0x0400 << 16) | (0xc788 >> 2),
545         0x00000000,
546         (0x0400 << 16) | (0xc78c >> 2),
547         0x00000000,
548         (0x0400 << 16) | (0xc798 >> 2),
549         0x00000000,
550         (0x0400 << 16) | (0xc79c >> 2),
551         0x00000000,
552         (0x0400 << 16) | (0xc7a0 >> 2),
553         0x00000000,
554         (0x0400 << 16) | (0xc7a4 >> 2),
555         0x00000000,
556         (0x0400 << 16) | (0xc7a8 >> 2),
557         0x00000000,
558         (0x0400 << 16) | (0xc7ac >> 2),
559         0x00000000,
560         (0x0400 << 16) | (0xc7b0 >> 2),
561         0x00000000,
562         (0x0400 << 16) | (0xc7b4 >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x9100 >> 2),
565         0x00000000,
566         (0x0e00 << 16) | (0x3c010 >> 2),
567         0x00000000,
568         (0x0e00 << 16) | (0x92a8 >> 2),
569         0x00000000,
570         (0x0e00 << 16) | (0x92ac >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0x92b4 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0x92b8 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x92bc >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x92c0 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x92c4 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x92c8 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0x92cc >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x92d0 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x8c00 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x8c04 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x8c20 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x8c38 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8c3c >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0xae00 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x9604 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0xac08 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0xac0c >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0xac10 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0xac14 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0xac58 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0xac68 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0xac6c >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0xac70 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0xac74 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0xac78 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0xac7c >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0xac80 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0xac84 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0xac88 >> 2),
629         0x00000000,
630         (0x0e00 << 16) | (0xac8c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0x970c >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0x9714 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0x9718 >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0x971c >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0x31068 >> 2),
641         0x00000000,
642         (0x4e00 << 16) | (0x31068 >> 2),
643         0x00000000,
644         (0x5e00 << 16) | (0x31068 >> 2),
645         0x00000000,
646         (0x6e00 << 16) | (0x31068 >> 2),
647         0x00000000,
648         (0x7e00 << 16) | (0x31068 >> 2),
649         0x00000000,
650         (0x8e00 << 16) | (0x31068 >> 2),
651         0x00000000,
652         (0x9e00 << 16) | (0x31068 >> 2),
653         0x00000000,
654         (0xae00 << 16) | (0x31068 >> 2),
655         0x00000000,
656         (0xbe00 << 16) | (0x31068 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xcd10 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xcd14 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x88b0 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x88b4 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x88b8 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x88bc >> 2),
669         0x00000000,
670         (0x0400 << 16) | (0x89c0 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x88c4 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x88c8 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x88d0 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x88d4 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x88d8 >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x8980 >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x30938 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x3093c >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x30940 >> 2),
689         0x00000000,
690         (0x0e00 << 16) | (0x89a0 >> 2),
691         0x00000000,
692         (0x0e00 << 16) | (0x30900 >> 2),
693         0x00000000,
694         (0x0e00 << 16) | (0x30904 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x89b4 >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x3c210 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0x3c214 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0x3c218 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0x8904 >> 2),
705         0x00000000,
706         0x5,
707         (0x0e00 << 16) | (0x8c28 >> 2),
708         (0x0e00 << 16) | (0x8c2c >> 2),
709         (0x0e00 << 16) | (0x8c30 >> 2),
710         (0x0e00 << 16) | (0x8c34 >> 2),
711         (0x0e00 << 16) | (0x9600 >> 2),
712 };
713
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716         (0x0e00 << 16) | (0xc12c >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc140 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc150 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc15c >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc168 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc170 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc204 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2b4 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2b8 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc2bc >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc2c0 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x8228 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x829c >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x869c >> 2),
743         0x00000000,
744         (0x0600 << 16) | (0x98f4 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0x98f8 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0x9900 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc260 >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x90e8 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x3c000 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x3c00c >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x8c1c >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x9700 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xcd20 >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xcd20 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xcd20 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xcd20 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xcd20 >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0x89bc >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0x8900 >> 2),
775         0x00000000,
776         0x3,
777         (0x0e00 << 16) | (0xc130 >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0xc134 >> 2),
780         0x00000000,
781         (0x0e00 << 16) | (0xc1fc >> 2),
782         0x00000000,
783         (0x0e00 << 16) | (0xc208 >> 2),
784         0x00000000,
785         (0x0e00 << 16) | (0xc264 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc268 >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0xc26c >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0xc270 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0xc274 >> 2),
794         0x00000000,
795         (0x0e00 << 16) | (0xc28c >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0xc290 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0xc294 >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0xc298 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0xc2a0 >> 2),
804         0x00000000,
805         (0x0e00 << 16) | (0xc2a4 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0xc2a8 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0xc2ac >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x301d0 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x30238 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0x30250 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0x30254 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0x30258 >> 2),
820         0x00000000,
821         (0x0e00 << 16) | (0x3025c >> 2),
822         0x00000000,
823         (0x4e00 << 16) | (0xc900 >> 2),
824         0x00000000,
825         (0x5e00 << 16) | (0xc900 >> 2),
826         0x00000000,
827         (0x6e00 << 16) | (0xc900 >> 2),
828         0x00000000,
829         (0x7e00 << 16) | (0xc900 >> 2),
830         0x00000000,
831         (0x4e00 << 16) | (0xc904 >> 2),
832         0x00000000,
833         (0x5e00 << 16) | (0xc904 >> 2),
834         0x00000000,
835         (0x6e00 << 16) | (0xc904 >> 2),
836         0x00000000,
837         (0x7e00 << 16) | (0xc904 >> 2),
838         0x00000000,
839         (0x4e00 << 16) | (0xc908 >> 2),
840         0x00000000,
841         (0x5e00 << 16) | (0xc908 >> 2),
842         0x00000000,
843         (0x6e00 << 16) | (0xc908 >> 2),
844         0x00000000,
845         (0x7e00 << 16) | (0xc908 >> 2),
846         0x00000000,
847         (0x4e00 << 16) | (0xc90c >> 2),
848         0x00000000,
849         (0x5e00 << 16) | (0xc90c >> 2),
850         0x00000000,
851         (0x6e00 << 16) | (0xc90c >> 2),
852         0x00000000,
853         (0x7e00 << 16) | (0xc90c >> 2),
854         0x00000000,
855         (0x4e00 << 16) | (0xc910 >> 2),
856         0x00000000,
857         (0x5e00 << 16) | (0xc910 >> 2),
858         0x00000000,
859         (0x6e00 << 16) | (0xc910 >> 2),
860         0x00000000,
861         (0x7e00 << 16) | (0xc910 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0xc99c >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0x9834 >> 2),
866         0x00000000,
867         (0x0000 << 16) | (0x30f00 >> 2),
868         0x00000000,
869         (0x0000 << 16) | (0x30f04 >> 2),
870         0x00000000,
871         (0x0000 << 16) | (0x30f08 >> 2),
872         0x00000000,
873         (0x0000 << 16) | (0x30f0c >> 2),
874         0x00000000,
875         (0x0600 << 16) | (0x9b7c >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x8a14 >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x8a18 >> 2),
880         0x00000000,
881         (0x0600 << 16) | (0x30a00 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x8bf0 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x8bcc >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x8b24 >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x30a04 >> 2),
890         0x00000000,
891         (0x0600 << 16) | (0x30a10 >> 2),
892         0x00000000,
893         (0x0600 << 16) | (0x30a14 >> 2),
894         0x00000000,
895         (0x0600 << 16) | (0x30a18 >> 2),
896         0x00000000,
897         (0x0600 << 16) | (0x30a2c >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0xc700 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0xc704 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0xc708 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xc768 >> 2),
906         0x00000000,
907         (0x0400 << 16) | (0xc770 >> 2),
908         0x00000000,
909         (0x0400 << 16) | (0xc774 >> 2),
910         0x00000000,
911         (0x0400 << 16) | (0xc798 >> 2),
912         0x00000000,
913         (0x0400 << 16) | (0xc79c >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x9100 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x3c010 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x8c00 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x8c04 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x8c20 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x8c38 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x8c3c >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0xae00 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x9604 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0xac08 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0xac0c >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0xac10 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0xac14 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0xac58 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0xac68 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0xac6c >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0xac70 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0xac74 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0xac78 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0xac7c >> 2),
954         0x00000000,
955         (0x0e00 << 16) | (0xac80 >> 2),
956         0x00000000,
957         (0x0e00 << 16) | (0xac84 >> 2),
958         0x00000000,
959         (0x0e00 << 16) | (0xac88 >> 2),
960         0x00000000,
961         (0x0e00 << 16) | (0xac8c >> 2),
962         0x00000000,
963         (0x0e00 << 16) | (0x970c >> 2),
964         0x00000000,
965         (0x0e00 << 16) | (0x9714 >> 2),
966         0x00000000,
967         (0x0e00 << 16) | (0x9718 >> 2),
968         0x00000000,
969         (0x0e00 << 16) | (0x971c >> 2),
970         0x00000000,
971         (0x0e00 << 16) | (0x31068 >> 2),
972         0x00000000,
973         (0x4e00 << 16) | (0x31068 >> 2),
974         0x00000000,
975         (0x5e00 << 16) | (0x31068 >> 2),
976         0x00000000,
977         (0x6e00 << 16) | (0x31068 >> 2),
978         0x00000000,
979         (0x7e00 << 16) | (0x31068 >> 2),
980         0x00000000,
981         (0x0e00 << 16) | (0xcd10 >> 2),
982         0x00000000,
983         (0x0e00 << 16) | (0xcd14 >> 2),
984         0x00000000,
985         (0x0e00 << 16) | (0x88b0 >> 2),
986         0x00000000,
987         (0x0e00 << 16) | (0x88b4 >> 2),
988         0x00000000,
989         (0x0e00 << 16) | (0x88b8 >> 2),
990         0x00000000,
991         (0x0e00 << 16) | (0x88bc >> 2),
992         0x00000000,
993         (0x0400 << 16) | (0x89c0 >> 2),
994         0x00000000,
995         (0x0e00 << 16) | (0x88c4 >> 2),
996         0x00000000,
997         (0x0e00 << 16) | (0x88c8 >> 2),
998         0x00000000,
999         (0x0e00 << 16) | (0x88d0 >> 2),
1000         0x00000000,
1001         (0x0e00 << 16) | (0x88d4 >> 2),
1002         0x00000000,
1003         (0x0e00 << 16) | (0x88d8 >> 2),
1004         0x00000000,
1005         (0x0e00 << 16) | (0x8980 >> 2),
1006         0x00000000,
1007         (0x0e00 << 16) | (0x30938 >> 2),
1008         0x00000000,
1009         (0x0e00 << 16) | (0x3093c >> 2),
1010         0x00000000,
1011         (0x0e00 << 16) | (0x30940 >> 2),
1012         0x00000000,
1013         (0x0e00 << 16) | (0x89a0 >> 2),
1014         0x00000000,
1015         (0x0e00 << 16) | (0x30900 >> 2),
1016         0x00000000,
1017         (0x0e00 << 16) | (0x30904 >> 2),
1018         0x00000000,
1019         (0x0e00 << 16) | (0x89b4 >> 2),
1020         0x00000000,
1021         (0x0e00 << 16) | (0x3e1fc >> 2),
1022         0x00000000,
1023         (0x0e00 << 16) | (0x3c210 >> 2),
1024         0x00000000,
1025         (0x0e00 << 16) | (0x3c214 >> 2),
1026         0x00000000,
1027         (0x0e00 << 16) | (0x3c218 >> 2),
1028         0x00000000,
1029         (0x0e00 << 16) | (0x8904 >> 2),
1030         0x00000000,
1031         0x5,
1032         (0x0e00 << 16) | (0x8c28 >> 2),
1033         (0x0e00 << 16) | (0x8c2c >> 2),
1034         (0x0e00 << 16) | (0x8c30 >> 2),
1035         (0x0e00 << 16) | (0x8c34 >> 2),
1036         (0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041         0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046         0xc770, 0xffffffff, 0x00000800,
1047         0xc774, 0xffffffff, 0x00000800,
1048         0xc798, 0xffffffff, 0x00007fbf,
1049         0xc79c, 0xffffffff, 0x00007faf
1050 };
1051
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054         0x3354, 0x00000333, 0x00000333,
1055         0x3350, 0x000c0fc0, 0x00040200,
1056         0x9a10, 0x00010000, 0x00058208,
1057         0x3c000, 0xffff1fff, 0x00140000,
1058         0x3c200, 0xfdfc0fff, 0x00000100,
1059         0x3c234, 0x40000000, 0x40000200,
1060         0x9830, 0xffffffff, 0x00000000,
1061         0x9834, 0xf00fffff, 0x00000400,
1062         0x9838, 0x0002021c, 0x00020200,
1063         0xc78, 0x00000080, 0x00000000,
1064         0x5bb0, 0x000000f0, 0x00000070,
1065         0x5bc0, 0xf0311fff, 0x80300000,
1066         0x98f8, 0x73773777, 0x12010001,
1067         0x350c, 0x00810000, 0x408af000,
1068         0x7030, 0x31000111, 0x00000011,
1069         0x2f48, 0x73773777, 0x12010001,
1070         0x220c, 0x00007fb6, 0x0021a1b1,
1071         0x2210, 0x00007fb6, 0x002021b1,
1072         0x2180, 0x00007fb6, 0x00002191,
1073         0x2218, 0x00007fb6, 0x002121b1,
1074         0x221c, 0x00007fb6, 0x002021b1,
1075         0x21dc, 0x00007fb6, 0x00002191,
1076         0x21e0, 0x00007fb6, 0x00002191,
1077         0x3628, 0x0000003f, 0x0000000a,
1078         0x362c, 0x0000003f, 0x0000000a,
1079         0x2ae4, 0x00073ffe, 0x000022a2,
1080         0x240c, 0x000007ff, 0x00000000,
1081         0x8a14, 0xf000003f, 0x00000007,
1082         0x8bf0, 0x00002001, 0x00000001,
1083         0x8b24, 0xffffffff, 0x00ffffff,
1084         0x30a04, 0x0000ff0f, 0x00000000,
1085         0x28a4c, 0x07ffffff, 0x06000000,
1086         0x4d8, 0x00000fff, 0x00000100,
1087         0x3e78, 0x00000001, 0x00000002,
1088         0x9100, 0x03000000, 0x0362c688,
1089         0x8c00, 0x000000ff, 0x00000001,
1090         0xe40, 0x00001fff, 0x00001fff,
1091         0x9060, 0x0000007f, 0x00000020,
1092         0x9508, 0x00010000, 0x00010000,
1093         0xac14, 0x000003ff, 0x000000f3,
1094         0xac0c, 0xffffffff, 0x00001032
1095 };
1096
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099         0xc420, 0xffffffff, 0xfffffffc,
1100         0x30800, 0xffffffff, 0xe0000000,
1101         0x3c2a0, 0xffffffff, 0x00000100,
1102         0x3c208, 0xffffffff, 0x00000100,
1103         0x3c2c0, 0xffffffff, 0xc0000100,
1104         0x3c2c8, 0xffffffff, 0xc0000100,
1105         0x3c2c4, 0xffffffff, 0xc0000100,
1106         0x55e4, 0xffffffff, 0x00600100,
1107         0x3c280, 0xffffffff, 0x00000100,
1108         0x3c214, 0xffffffff, 0x06000100,
1109         0x3c220, 0xffffffff, 0x00000100,
1110         0x3c218, 0xffffffff, 0x06000100,
1111         0x3c204, 0xffffffff, 0x00000100,
1112         0x3c2e0, 0xffffffff, 0x00000100,
1113         0x3c224, 0xffffffff, 0x00000100,
1114         0x3c200, 0xffffffff, 0x00000100,
1115         0x3c230, 0xffffffff, 0x00000100,
1116         0x3c234, 0xffffffff, 0x00000100,
1117         0x3c250, 0xffffffff, 0x00000100,
1118         0x3c254, 0xffffffff, 0x00000100,
1119         0x3c258, 0xffffffff, 0x00000100,
1120         0x3c25c, 0xffffffff, 0x00000100,
1121         0x3c260, 0xffffffff, 0x00000100,
1122         0x3c27c, 0xffffffff, 0x00000100,
1123         0x3c278, 0xffffffff, 0x00000100,
1124         0x3c210, 0xffffffff, 0x06000100,
1125         0x3c290, 0xffffffff, 0x00000100,
1126         0x3c274, 0xffffffff, 0x00000100,
1127         0x3c2b4, 0xffffffff, 0x00000100,
1128         0x3c2b0, 0xffffffff, 0x00000100,
1129         0x3c270, 0xffffffff, 0x00000100,
1130         0x30800, 0xffffffff, 0xe0000000,
1131         0x3c020, 0xffffffff, 0x00010000,
1132         0x3c024, 0xffffffff, 0x00030002,
1133         0x3c028, 0xffffffff, 0x00040007,
1134         0x3c02c, 0xffffffff, 0x00060005,
1135         0x3c030, 0xffffffff, 0x00090008,
1136         0x3c034, 0xffffffff, 0x00010000,
1137         0x3c038, 0xffffffff, 0x00030002,
1138         0x3c03c, 0xffffffff, 0x00040007,
1139         0x3c040, 0xffffffff, 0x00060005,
1140         0x3c044, 0xffffffff, 0x00090008,
1141         0x3c048, 0xffffffff, 0x00010000,
1142         0x3c04c, 0xffffffff, 0x00030002,
1143         0x3c050, 0xffffffff, 0x00040007,
1144         0x3c054, 0xffffffff, 0x00060005,
1145         0x3c058, 0xffffffff, 0x00090008,
1146         0x3c05c, 0xffffffff, 0x00010000,
1147         0x3c060, 0xffffffff, 0x00030002,
1148         0x3c064, 0xffffffff, 0x00040007,
1149         0x3c068, 0xffffffff, 0x00060005,
1150         0x3c06c, 0xffffffff, 0x00090008,
1151         0x3c070, 0xffffffff, 0x00010000,
1152         0x3c074, 0xffffffff, 0x00030002,
1153         0x3c078, 0xffffffff, 0x00040007,
1154         0x3c07c, 0xffffffff, 0x00060005,
1155         0x3c080, 0xffffffff, 0x00090008,
1156         0x3c084, 0xffffffff, 0x00010000,
1157         0x3c088, 0xffffffff, 0x00030002,
1158         0x3c08c, 0xffffffff, 0x00040007,
1159         0x3c090, 0xffffffff, 0x00060005,
1160         0x3c094, 0xffffffff, 0x00090008,
1161         0x3c098, 0xffffffff, 0x00010000,
1162         0x3c09c, 0xffffffff, 0x00030002,
1163         0x3c0a0, 0xffffffff, 0x00040007,
1164         0x3c0a4, 0xffffffff, 0x00060005,
1165         0x3c0a8, 0xffffffff, 0x00090008,
1166         0x3c000, 0xffffffff, 0x96e00200,
1167         0x8708, 0xffffffff, 0x00900100,
1168         0xc424, 0xffffffff, 0x0020003f,
1169         0x38, 0xffffffff, 0x0140001c,
1170         0x3c, 0x000f0000, 0x000f0000,
1171         0x220, 0xffffffff, 0xC060000C,
1172         0x224, 0xc0000fff, 0x00000100,
1173         0xf90, 0xffffffff, 0x00000100,
1174         0xf98, 0x00000101, 0x00000000,
1175         0x20a8, 0xffffffff, 0x00000104,
1176         0x55e4, 0xff000fff, 0x00000100,
1177         0x30cc, 0xc0000fff, 0x00000104,
1178         0xc1e4, 0x00000001, 0x00000001,
1179         0xd00c, 0xff000ff0, 0x00000100,
1180         0xd80c, 0xff000ff0, 0x00000100
1181 };
1182
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185         0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190         0xc770, 0xffffffff, 0x00000800,
1191         0xc774, 0xffffffff, 0x00000800,
1192         0xc798, 0xffffffff, 0x00007fbf,
1193         0xc79c, 0xffffffff, 0x00007faf
1194 };
1195
1196 static const u32 spectre_golden_registers[] =
1197 {
1198         0x3c000, 0xffff1fff, 0x96940200,
1199         0x3c00c, 0xffff0001, 0xff000000,
1200         0x3c200, 0xfffc0fff, 0x00000100,
1201         0x6ed8, 0x00010101, 0x00010000,
1202         0x9834, 0xf00fffff, 0x00000400,
1203         0x9838, 0xfffffffc, 0x00020200,
1204         0x5bb0, 0x000000f0, 0x00000070,
1205         0x5bc0, 0xf0311fff, 0x80300000,
1206         0x98f8, 0x73773777, 0x12010001,
1207         0x9b7c, 0x00ff0000, 0x00fc0000,
1208         0x2f48, 0x73773777, 0x12010001,
1209         0x8a14, 0xf000003f, 0x00000007,
1210         0x8b24, 0xffffffff, 0x00ffffff,
1211         0x28350, 0x3f3f3fff, 0x00000082,
1212         0x28354, 0x0000003f, 0x00000000,
1213         0x3e78, 0x00000001, 0x00000002,
1214         0x913c, 0xffff03df, 0x00000004,
1215         0xc768, 0x00000008, 0x00000008,
1216         0x8c00, 0x000008ff, 0x00000800,
1217         0x9508, 0x00010000, 0x00010000,
1218         0xac0c, 0xffffffff, 0x54763210,
1219         0x214f8, 0x01ff01ff, 0x00000002,
1220         0x21498, 0x007ff800, 0x00200000,
1221         0x2015c, 0xffffffff, 0x00000f40,
1222         0x30934, 0xffffffff, 0x00000001
1223 };
1224
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227         0xc420, 0xffffffff, 0xfffffffc,
1228         0x30800, 0xffffffff, 0xe0000000,
1229         0x3c2a0, 0xffffffff, 0x00000100,
1230         0x3c208, 0xffffffff, 0x00000100,
1231         0x3c2c0, 0xffffffff, 0x00000100,
1232         0x3c2c8, 0xffffffff, 0x00000100,
1233         0x3c2c4, 0xffffffff, 0x00000100,
1234         0x55e4, 0xffffffff, 0x00600100,
1235         0x3c280, 0xffffffff, 0x00000100,
1236         0x3c214, 0xffffffff, 0x06000100,
1237         0x3c220, 0xffffffff, 0x00000100,
1238         0x3c218, 0xffffffff, 0x06000100,
1239         0x3c204, 0xffffffff, 0x00000100,
1240         0x3c2e0, 0xffffffff, 0x00000100,
1241         0x3c224, 0xffffffff, 0x00000100,
1242         0x3c200, 0xffffffff, 0x00000100,
1243         0x3c230, 0xffffffff, 0x00000100,
1244         0x3c234, 0xffffffff, 0x00000100,
1245         0x3c250, 0xffffffff, 0x00000100,
1246         0x3c254, 0xffffffff, 0x00000100,
1247         0x3c258, 0xffffffff, 0x00000100,
1248         0x3c25c, 0xffffffff, 0x00000100,
1249         0x3c260, 0xffffffff, 0x00000100,
1250         0x3c27c, 0xffffffff, 0x00000100,
1251         0x3c278, 0xffffffff, 0x00000100,
1252         0x3c210, 0xffffffff, 0x06000100,
1253         0x3c290, 0xffffffff, 0x00000100,
1254         0x3c274, 0xffffffff, 0x00000100,
1255         0x3c2b4, 0xffffffff, 0x00000100,
1256         0x3c2b0, 0xffffffff, 0x00000100,
1257         0x3c270, 0xffffffff, 0x00000100,
1258         0x30800, 0xffffffff, 0xe0000000,
1259         0x3c020, 0xffffffff, 0x00010000,
1260         0x3c024, 0xffffffff, 0x00030002,
1261         0x3c028, 0xffffffff, 0x00040007,
1262         0x3c02c, 0xffffffff, 0x00060005,
1263         0x3c030, 0xffffffff, 0x00090008,
1264         0x3c034, 0xffffffff, 0x00010000,
1265         0x3c038, 0xffffffff, 0x00030002,
1266         0x3c03c, 0xffffffff, 0x00040007,
1267         0x3c040, 0xffffffff, 0x00060005,
1268         0x3c044, 0xffffffff, 0x00090008,
1269         0x3c048, 0xffffffff, 0x00010000,
1270         0x3c04c, 0xffffffff, 0x00030002,
1271         0x3c050, 0xffffffff, 0x00040007,
1272         0x3c054, 0xffffffff, 0x00060005,
1273         0x3c058, 0xffffffff, 0x00090008,
1274         0x3c05c, 0xffffffff, 0x00010000,
1275         0x3c060, 0xffffffff, 0x00030002,
1276         0x3c064, 0xffffffff, 0x00040007,
1277         0x3c068, 0xffffffff, 0x00060005,
1278         0x3c06c, 0xffffffff, 0x00090008,
1279         0x3c070, 0xffffffff, 0x00010000,
1280         0x3c074, 0xffffffff, 0x00030002,
1281         0x3c078, 0xffffffff, 0x00040007,
1282         0x3c07c, 0xffffffff, 0x00060005,
1283         0x3c080, 0xffffffff, 0x00090008,
1284         0x3c084, 0xffffffff, 0x00010000,
1285         0x3c088, 0xffffffff, 0x00030002,
1286         0x3c08c, 0xffffffff, 0x00040007,
1287         0x3c090, 0xffffffff, 0x00060005,
1288         0x3c094, 0xffffffff, 0x00090008,
1289         0x3c098, 0xffffffff, 0x00010000,
1290         0x3c09c, 0xffffffff, 0x00030002,
1291         0x3c0a0, 0xffffffff, 0x00040007,
1292         0x3c0a4, 0xffffffff, 0x00060005,
1293         0x3c0a8, 0xffffffff, 0x00090008,
1294         0x3c0ac, 0xffffffff, 0x00010000,
1295         0x3c0b0, 0xffffffff, 0x00030002,
1296         0x3c0b4, 0xffffffff, 0x00040007,
1297         0x3c0b8, 0xffffffff, 0x00060005,
1298         0x3c0bc, 0xffffffff, 0x00090008,
1299         0x3c000, 0xffffffff, 0x96e00200,
1300         0x8708, 0xffffffff, 0x00900100,
1301         0xc424, 0xffffffff, 0x0020003f,
1302         0x38, 0xffffffff, 0x0140001c,
1303         0x3c, 0x000f0000, 0x000f0000,
1304         0x220, 0xffffffff, 0xC060000C,
1305         0x224, 0xc0000fff, 0x00000100,
1306         0xf90, 0xffffffff, 0x00000100,
1307         0xf98, 0x00000101, 0x00000000,
1308         0x20a8, 0xffffffff, 0x00000104,
1309         0x55e4, 0xff000fff, 0x00000100,
1310         0x30cc, 0xc0000fff, 0x00000104,
1311         0xc1e4, 0x00000001, 0x00000001,
1312         0xd00c, 0xff000ff0, 0x00000100,
1313         0xd80c, 0xff000ff0, 0x00000100
1314 };
1315
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318         0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323         0xc770, 0xffffffff, 0x00000800,
1324         0xc774, 0xffffffff, 0x00000800,
1325         0xc798, 0xffffffff, 0x00007fbf,
1326         0xc79c, 0xffffffff, 0x00007faf
1327 };
1328
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331         0x3c000, 0xffffdfff, 0x6e944040,
1332         0x55e4, 0xff607fff, 0xfc000100,
1333         0x3c220, 0xff000fff, 0x00000100,
1334         0x3c224, 0xff000fff, 0x00000100,
1335         0x3c200, 0xfffc0fff, 0x00000100,
1336         0x6ed8, 0x00010101, 0x00010000,
1337         0x9830, 0xffffffff, 0x00000000,
1338         0x9834, 0xf00fffff, 0x00000400,
1339         0x5bb0, 0x000000f0, 0x00000070,
1340         0x5bc0, 0xf0311fff, 0x80300000,
1341         0x98f8, 0x73773777, 0x12010001,
1342         0x98fc, 0xffffffff, 0x00000010,
1343         0x9b7c, 0x00ff0000, 0x00fc0000,
1344         0x8030, 0x00001f0f, 0x0000100a,
1345         0x2f48, 0x73773777, 0x12010001,
1346         0x2408, 0x000fffff, 0x000c007f,
1347         0x8a14, 0xf000003f, 0x00000007,
1348         0x8b24, 0x3fff3fff, 0x00ffcfff,
1349         0x30a04, 0x0000ff0f, 0x00000000,
1350         0x28a4c, 0x07ffffff, 0x06000000,
1351         0x4d8, 0x00000fff, 0x00000100,
1352         0x3e78, 0x00000001, 0x00000002,
1353         0xc768, 0x00000008, 0x00000008,
1354         0x8c00, 0x000000ff, 0x00000003,
1355         0x214f8, 0x01ff01ff, 0x00000002,
1356         0x21498, 0x007ff800, 0x00200000,
1357         0x2015c, 0xffffffff, 0x00000f40,
1358         0x88c4, 0x001f3ae3, 0x00000082,
1359         0x88d4, 0x0000001f, 0x00000010,
1360         0x30934, 0xffffffff, 0x00000000
1361 };
1362
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365         0xc420, 0xffffffff, 0xfffffffc,
1366         0x30800, 0xffffffff, 0xe0000000,
1367         0x3c2a0, 0xffffffff, 0x00000100,
1368         0x3c208, 0xffffffff, 0x00000100,
1369         0x3c2c0, 0xffffffff, 0x00000100,
1370         0x3c2c8, 0xffffffff, 0x00000100,
1371         0x3c2c4, 0xffffffff, 0x00000100,
1372         0x55e4, 0xffffffff, 0x00600100,
1373         0x3c280, 0xffffffff, 0x00000100,
1374         0x3c214, 0xffffffff, 0x06000100,
1375         0x3c220, 0xffffffff, 0x00000100,
1376         0x3c218, 0xffffffff, 0x06000100,
1377         0x3c204, 0xffffffff, 0x00000100,
1378         0x3c2e0, 0xffffffff, 0x00000100,
1379         0x3c224, 0xffffffff, 0x00000100,
1380         0x3c200, 0xffffffff, 0x00000100,
1381         0x3c230, 0xffffffff, 0x00000100,
1382         0x3c234, 0xffffffff, 0x00000100,
1383         0x3c250, 0xffffffff, 0x00000100,
1384         0x3c254, 0xffffffff, 0x00000100,
1385         0x3c258, 0xffffffff, 0x00000100,
1386         0x3c25c, 0xffffffff, 0x00000100,
1387         0x3c260, 0xffffffff, 0x00000100,
1388         0x3c27c, 0xffffffff, 0x00000100,
1389         0x3c278, 0xffffffff, 0x00000100,
1390         0x3c210, 0xffffffff, 0x06000100,
1391         0x3c290, 0xffffffff, 0x00000100,
1392         0x3c274, 0xffffffff, 0x00000100,
1393         0x3c2b4, 0xffffffff, 0x00000100,
1394         0x3c2b0, 0xffffffff, 0x00000100,
1395         0x3c270, 0xffffffff, 0x00000100,
1396         0x30800, 0xffffffff, 0xe0000000,
1397         0x3c020, 0xffffffff, 0x00010000,
1398         0x3c024, 0xffffffff, 0x00030002,
1399         0x3c028, 0xffffffff, 0x00040007,
1400         0x3c02c, 0xffffffff, 0x00060005,
1401         0x3c030, 0xffffffff, 0x00090008,
1402         0x3c034, 0xffffffff, 0x00010000,
1403         0x3c038, 0xffffffff, 0x00030002,
1404         0x3c03c, 0xffffffff, 0x00040007,
1405         0x3c040, 0xffffffff, 0x00060005,
1406         0x3c044, 0xffffffff, 0x00090008,
1407         0x3c000, 0xffffffff, 0x96e00200,
1408         0x8708, 0xffffffff, 0x00900100,
1409         0xc424, 0xffffffff, 0x0020003f,
1410         0x38, 0xffffffff, 0x0140001c,
1411         0x3c, 0x000f0000, 0x000f0000,
1412         0x220, 0xffffffff, 0xC060000C,
1413         0x224, 0xc0000fff, 0x00000100,
1414         0x20a8, 0xffffffff, 0x00000104,
1415         0x55e4, 0xff000fff, 0x00000100,
1416         0x30cc, 0xc0000fff, 0x00000104,
1417         0xc1e4, 0x00000001, 0x00000001,
1418         0xd00c, 0xff000ff0, 0x00000100,
1419         0xd80c, 0xff000ff0, 0x00000100
1420 };
1421
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424         0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429         0x30800, 0xffffffff, 0xe0000000,
1430         0x28350, 0xffffffff, 0x3a00161a,
1431         0x28354, 0xffffffff, 0x0000002e,
1432         0x9a10, 0xffffffff, 0x00018208,
1433         0x98f8, 0xffffffff, 0x12011003
1434 };
1435
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438         0x3354, 0x00000333, 0x00000333,
1439         0x9a10, 0x00010000, 0x00058208,
1440         0x9830, 0xffffffff, 0x00000000,
1441         0x9834, 0xf00fffff, 0x00000400,
1442         0x9838, 0x0002021c, 0x00020200,
1443         0xc78, 0x00000080, 0x00000000,
1444         0x5bb0, 0x000000f0, 0x00000070,
1445         0x5bc0, 0xf0311fff, 0x80300000,
1446         0x350c, 0x00810000, 0x408af000,
1447         0x7030, 0x31000111, 0x00000011,
1448         0x2f48, 0x73773777, 0x12010001,
1449         0x2120, 0x0000007f, 0x0000001b,
1450         0x21dc, 0x00007fb6, 0x00002191,
1451         0x3628, 0x0000003f, 0x0000000a,
1452         0x362c, 0x0000003f, 0x0000000a,
1453         0x2ae4, 0x00073ffe, 0x000022a2,
1454         0x240c, 0x000007ff, 0x00000000,
1455         0x8bf0, 0x00002001, 0x00000001,
1456         0x8b24, 0xffffffff, 0x00ffffff,
1457         0x30a04, 0x0000ff0f, 0x00000000,
1458         0x28a4c, 0x07ffffff, 0x06000000,
1459         0x3e78, 0x00000001, 0x00000002,
1460         0xc768, 0x00000008, 0x00000008,
1461         0xc770, 0x00000f00, 0x00000800,
1462         0xc774, 0x00000f00, 0x00000800,
1463         0xc798, 0x00ffffff, 0x00ff7fbf,
1464         0xc79c, 0x00ffffff, 0x00ff7faf,
1465         0x8c00, 0x000000ff, 0x00000800,
1466         0xe40, 0x00001fff, 0x00001fff,
1467         0x9060, 0x0000007f, 0x00000020,
1468         0x9508, 0x00010000, 0x00010000,
1469         0xae00, 0x00100000, 0x000ff07c,
1470         0xac14, 0x000003ff, 0x0000000f,
1471         0xac10, 0xffffffff, 0x7564fdec,
1472         0xac0c, 0xffffffff, 0x3120b9a8,
1473         0xac08, 0x20000000, 0x0f9c0000
1474 };
1475
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478         0xc420, 0xffffffff, 0xfffffffd,
1479         0x30800, 0xffffffff, 0xe0000000,
1480         0x3c2a0, 0xffffffff, 0x00000100,
1481         0x3c208, 0xffffffff, 0x00000100,
1482         0x3c2c0, 0xffffffff, 0x00000100,
1483         0x3c2c8, 0xffffffff, 0x00000100,
1484         0x3c2c4, 0xffffffff, 0x00000100,
1485         0x55e4, 0xffffffff, 0x00200100,
1486         0x3c280, 0xffffffff, 0x00000100,
1487         0x3c214, 0xffffffff, 0x06000100,
1488         0x3c220, 0xffffffff, 0x00000100,
1489         0x3c218, 0xffffffff, 0x06000100,
1490         0x3c204, 0xffffffff, 0x00000100,
1491         0x3c2e0, 0xffffffff, 0x00000100,
1492         0x3c224, 0xffffffff, 0x00000100,
1493         0x3c200, 0xffffffff, 0x00000100,
1494         0x3c230, 0xffffffff, 0x00000100,
1495         0x3c234, 0xffffffff, 0x00000100,
1496         0x3c250, 0xffffffff, 0x00000100,
1497         0x3c254, 0xffffffff, 0x00000100,
1498         0x3c258, 0xffffffff, 0x00000100,
1499         0x3c25c, 0xffffffff, 0x00000100,
1500         0x3c260, 0xffffffff, 0x00000100,
1501         0x3c27c, 0xffffffff, 0x00000100,
1502         0x3c278, 0xffffffff, 0x00000100,
1503         0x3c210, 0xffffffff, 0x06000100,
1504         0x3c290, 0xffffffff, 0x00000100,
1505         0x3c274, 0xffffffff, 0x00000100,
1506         0x3c2b4, 0xffffffff, 0x00000100,
1507         0x3c2b0, 0xffffffff, 0x00000100,
1508         0x3c270, 0xffffffff, 0x00000100,
1509         0x30800, 0xffffffff, 0xe0000000,
1510         0x3c020, 0xffffffff, 0x00010000,
1511         0x3c024, 0xffffffff, 0x00030002,
1512         0x3c028, 0xffffffff, 0x00040007,
1513         0x3c02c, 0xffffffff, 0x00060005,
1514         0x3c030, 0xffffffff, 0x00090008,
1515         0x3c034, 0xffffffff, 0x00010000,
1516         0x3c038, 0xffffffff, 0x00030002,
1517         0x3c03c, 0xffffffff, 0x00040007,
1518         0x3c040, 0xffffffff, 0x00060005,
1519         0x3c044, 0xffffffff, 0x00090008,
1520         0x3c048, 0xffffffff, 0x00010000,
1521         0x3c04c, 0xffffffff, 0x00030002,
1522         0x3c050, 0xffffffff, 0x00040007,
1523         0x3c054, 0xffffffff, 0x00060005,
1524         0x3c058, 0xffffffff, 0x00090008,
1525         0x3c05c, 0xffffffff, 0x00010000,
1526         0x3c060, 0xffffffff, 0x00030002,
1527         0x3c064, 0xffffffff, 0x00040007,
1528         0x3c068, 0xffffffff, 0x00060005,
1529         0x3c06c, 0xffffffff, 0x00090008,
1530         0x3c070, 0xffffffff, 0x00010000,
1531         0x3c074, 0xffffffff, 0x00030002,
1532         0x3c078, 0xffffffff, 0x00040007,
1533         0x3c07c, 0xffffffff, 0x00060005,
1534         0x3c080, 0xffffffff, 0x00090008,
1535         0x3c084, 0xffffffff, 0x00010000,
1536         0x3c088, 0xffffffff, 0x00030002,
1537         0x3c08c, 0xffffffff, 0x00040007,
1538         0x3c090, 0xffffffff, 0x00060005,
1539         0x3c094, 0xffffffff, 0x00090008,
1540         0x3c098, 0xffffffff, 0x00010000,
1541         0x3c09c, 0xffffffff, 0x00030002,
1542         0x3c0a0, 0xffffffff, 0x00040007,
1543         0x3c0a4, 0xffffffff, 0x00060005,
1544         0x3c0a8, 0xffffffff, 0x00090008,
1545         0x3c0ac, 0xffffffff, 0x00010000,
1546         0x3c0b0, 0xffffffff, 0x00030002,
1547         0x3c0b4, 0xffffffff, 0x00040007,
1548         0x3c0b8, 0xffffffff, 0x00060005,
1549         0x3c0bc, 0xffffffff, 0x00090008,
1550         0x3c0c0, 0xffffffff, 0x00010000,
1551         0x3c0c4, 0xffffffff, 0x00030002,
1552         0x3c0c8, 0xffffffff, 0x00040007,
1553         0x3c0cc, 0xffffffff, 0x00060005,
1554         0x3c0d0, 0xffffffff, 0x00090008,
1555         0x3c0d4, 0xffffffff, 0x00010000,
1556         0x3c0d8, 0xffffffff, 0x00030002,
1557         0x3c0dc, 0xffffffff, 0x00040007,
1558         0x3c0e0, 0xffffffff, 0x00060005,
1559         0x3c0e4, 0xffffffff, 0x00090008,
1560         0x3c0e8, 0xffffffff, 0x00010000,
1561         0x3c0ec, 0xffffffff, 0x00030002,
1562         0x3c0f0, 0xffffffff, 0x00040007,
1563         0x3c0f4, 0xffffffff, 0x00060005,
1564         0x3c0f8, 0xffffffff, 0x00090008,
1565         0xc318, 0xffffffff, 0x00020200,
1566         0x3350, 0xffffffff, 0x00000200,
1567         0x15c0, 0xffffffff, 0x00000400,
1568         0x55e8, 0xffffffff, 0x00000000,
1569         0x2f50, 0xffffffff, 0x00000902,
1570         0x3c000, 0xffffffff, 0x96940200,
1571         0x8708, 0xffffffff, 0x00900100,
1572         0xc424, 0xffffffff, 0x0020003f,
1573         0x38, 0xffffffff, 0x0140001c,
1574         0x3c, 0x000f0000, 0x000f0000,
1575         0x220, 0xffffffff, 0xc060000c,
1576         0x224, 0xc0000fff, 0x00000100,
1577         0xf90, 0xffffffff, 0x00000100,
1578         0xf98, 0x00000101, 0x00000000,
1579         0x20a8, 0xffffffff, 0x00000104,
1580         0x55e4, 0xff000fff, 0x00000100,
1581         0x30cc, 0xc0000fff, 0x00000104,
1582         0xc1e4, 0x00000001, 0x00000001,
1583         0xd00c, 0xff000ff0, 0x00000100,
1584         0xd80c, 0xff000ff0, 0x00000100
1585 };
1586
1587 static const u32 godavari_golden_registers[] =
1588 {
1589         0x55e4, 0xff607fff, 0xfc000100,
1590         0x6ed8, 0x00010101, 0x00010000,
1591         0x9830, 0xffffffff, 0x00000000,
1592         0x98302, 0xf00fffff, 0x00000400,
1593         0x6130, 0xffffffff, 0x00010000,
1594         0x5bb0, 0x000000f0, 0x00000070,
1595         0x5bc0, 0xf0311fff, 0x80300000,
1596         0x98f8, 0x73773777, 0x12010001,
1597         0x98fc, 0xffffffff, 0x00000010,
1598         0x8030, 0x00001f0f, 0x0000100a,
1599         0x2f48, 0x73773777, 0x12010001,
1600         0x2408, 0x000fffff, 0x000c007f,
1601         0x8a14, 0xf000003f, 0x00000007,
1602         0x8b24, 0xffffffff, 0x00ff0fff,
1603         0x30a04, 0x0000ff0f, 0x00000000,
1604         0x28a4c, 0x07ffffff, 0x06000000,
1605         0x4d8, 0x00000fff, 0x00000100,
1606         0xd014, 0x00010000, 0x00810001,
1607         0xd814, 0x00010000, 0x00810001,
1608         0x3e78, 0x00000001, 0x00000002,
1609         0xc768, 0x00000008, 0x00000008,
1610         0xc770, 0x00000f00, 0x00000800,
1611         0xc774, 0x00000f00, 0x00000800,
1612         0xc798, 0x00ffffff, 0x00ff7fbf,
1613         0xc79c, 0x00ffffff, 0x00ff7faf,
1614         0x8c00, 0x000000ff, 0x00000001,
1615         0x214f8, 0x01ff01ff, 0x00000002,
1616         0x21498, 0x007ff800, 0x00200000,
1617         0x2015c, 0xffffffff, 0x00000f40,
1618         0x88c4, 0x001f3ae3, 0x00000082,
1619         0x88d4, 0x0000001f, 0x00000010,
1620         0x30934, 0xffffffff, 0x00000000
1621 };
1622
1623
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627         mutex_lock(&rdev->grbm_idx_mutex);
1628         switch (rdev->family) {
1629         case CHIP_BONAIRE:
1630                 radeon_program_register_sequence(rdev,
1631                                                  bonaire_mgcg_cgcg_init,
1632                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633                 radeon_program_register_sequence(rdev,
1634                                                  bonaire_golden_registers,
1635                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  bonaire_golden_common_registers,
1638                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639                 radeon_program_register_sequence(rdev,
1640                                                  bonaire_golden_spm_registers,
1641                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642                 break;
1643         case CHIP_KABINI:
1644                 radeon_program_register_sequence(rdev,
1645                                                  kalindi_mgcg_cgcg_init,
1646                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647                 radeon_program_register_sequence(rdev,
1648                                                  kalindi_golden_registers,
1649                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650                 radeon_program_register_sequence(rdev,
1651                                                  kalindi_golden_common_registers,
1652                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653                 radeon_program_register_sequence(rdev,
1654                                                  kalindi_golden_spm_registers,
1655                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656                 break;
1657         case CHIP_MULLINS:
1658                 radeon_program_register_sequence(rdev,
1659                                                  kalindi_mgcg_cgcg_init,
1660                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661                 radeon_program_register_sequence(rdev,
1662                                                  godavari_golden_registers,
1663                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1664                 radeon_program_register_sequence(rdev,
1665                                                  kalindi_golden_common_registers,
1666                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667                 radeon_program_register_sequence(rdev,
1668                                                  kalindi_golden_spm_registers,
1669                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670                 break;
1671         case CHIP_KAVERI:
1672                 radeon_program_register_sequence(rdev,
1673                                                  spectre_mgcg_cgcg_init,
1674                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675                 radeon_program_register_sequence(rdev,
1676                                                  spectre_golden_registers,
1677                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1678                 radeon_program_register_sequence(rdev,
1679                                                  spectre_golden_common_registers,
1680                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681                 radeon_program_register_sequence(rdev,
1682                                                  spectre_golden_spm_registers,
1683                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684                 break;
1685         case CHIP_HAWAII:
1686                 radeon_program_register_sequence(rdev,
1687                                                  hawaii_mgcg_cgcg_init,
1688                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689                 radeon_program_register_sequence(rdev,
1690                                                  hawaii_golden_registers,
1691                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692                 radeon_program_register_sequence(rdev,
1693                                                  hawaii_golden_common_registers,
1694                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695                 radeon_program_register_sequence(rdev,
1696                                                  hawaii_golden_spm_registers,
1697                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698                 break;
1699         default:
1700                 break;
1701         }
1702         mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716
1717         if (rdev->flags & RADEON_IS_IGP) {
1718                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719                         return reference_clock / 2;
1720         } else {
1721                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722                         return reference_clock / 4;
1723         }
1724         return reference_clock;
1725 }
1726
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738         if (index < rdev->doorbell.num_doorbells) {
1739                 return readl(rdev->doorbell.ptr + index);
1740         } else {
1741                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742                 return 0;
1743         }
1744 }
1745
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758         if (index < rdev->doorbell.num_doorbells) {
1759                 writel(v, rdev->doorbell.ptr + index);
1760         } else {
1761                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762         }
1763 }
1764
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769         {0x00000070, 0x04400000},
1770         {0x00000071, 0x80c01803},
1771         {0x00000072, 0x00004004},
1772         {0x00000073, 0x00000100},
1773         {0x00000074, 0x00ff0000},
1774         {0x00000075, 0x34000000},
1775         {0x00000076, 0x08000014},
1776         {0x00000077, 0x00cc08ec},
1777         {0x00000078, 0x00000400},
1778         {0x00000079, 0x00000000},
1779         {0x0000007a, 0x04090000},
1780         {0x0000007c, 0x00000000},
1781         {0x0000007e, 0x4408a8e8},
1782         {0x0000007f, 0x00000304},
1783         {0x00000080, 0x00000000},
1784         {0x00000082, 0x00000001},
1785         {0x00000083, 0x00000002},
1786         {0x00000084, 0xf3e4f400},
1787         {0x00000085, 0x052024e3},
1788         {0x00000087, 0x00000000},
1789         {0x00000088, 0x01000000},
1790         {0x0000008a, 0x1c0a0000},
1791         {0x0000008b, 0xff010000},
1792         {0x0000008d, 0xffffefff},
1793         {0x0000008e, 0xfff3efff},
1794         {0x0000008f, 0xfff3efbf},
1795         {0x00000092, 0xf7ffffff},
1796         {0x00000093, 0xffffff7f},
1797         {0x00000095, 0x00101101},
1798         {0x00000096, 0x00000fff},
1799         {0x00000097, 0x00116fff},
1800         {0x00000098, 0x60010000},
1801         {0x00000099, 0x10010000},
1802         {0x0000009a, 0x00006000},
1803         {0x0000009b, 0x00001000},
1804         {0x0000009f, 0x00b48000}
1805 };
1806
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811         {0x0000007d, 0x40000000},
1812         {0x0000007e, 0x40180304},
1813         {0x0000007f, 0x0000ff00},
1814         {0x00000081, 0x00000000},
1815         {0x00000083, 0x00000800},
1816         {0x00000086, 0x00000000},
1817         {0x00000087, 0x00000100},
1818         {0x00000088, 0x00020100},
1819         {0x00000089, 0x00000000},
1820         {0x0000008b, 0x00040000},
1821         {0x0000008c, 0x00000100},
1822         {0x0000008e, 0xff010000},
1823         {0x00000090, 0xffffefff},
1824         {0x00000091, 0xfff3efff},
1825         {0x00000092, 0xfff3efbf},
1826         {0x00000093, 0xf7ffffff},
1827         {0x00000094, 0xffffff7f},
1828         {0x00000095, 0x00000fff},
1829         {0x00000096, 0x00116fff},
1830         {0x00000097, 0x60010000},
1831         {0x00000098, 0x10010000},
1832         {0x0000009f, 0x00c79000}
1833 };
1834
1835
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850                             u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853                              MEID(me & 0x3) |
1854                              VMID(vmid & 0xf) |
1855                              QUEUEID(queue & 0x7));
1856         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870         const __be32 *fw_data = NULL;
1871         const __le32 *new_fw_data = NULL;
1872         u32 running, blackout = 0, tmp;
1873         u32 *io_mc_regs = NULL;
1874         const __le32 *new_io_mc_regs = NULL;
1875         int i, regs_size, ucode_size;
1876
1877         if (!rdev->mc_fw)
1878                 return -EINVAL;
1879
1880         if (rdev->new_fw) {
1881                 const struct mc_firmware_header_v1_0 *hdr =
1882                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883
1884                 radeon_ucode_print_mc_hdr(&hdr->header);
1885
1886                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887                 new_io_mc_regs = (const __le32 *)
1888                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890                 new_fw_data = (const __le32 *)
1891                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892         } else {
1893                 ucode_size = rdev->mc_fw->size / 4;
1894
1895                 switch (rdev->family) {
1896                 case CHIP_BONAIRE:
1897                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899                         break;
1900                 case CHIP_HAWAII:
1901                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1903                         break;
1904                 default:
1905                         return -EINVAL;
1906                 }
1907                 fw_data = (const __be32 *)rdev->mc_fw->data;
1908         }
1909
1910         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911
1912         if (running == 0) {
1913                 if (running) {
1914                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916                 }
1917
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965
1966                 if (running)
1967                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968         }
1969
1970         return 0;
1971 }
1972
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984         const char *chip_name;
1985         const char *new_chip_name;
1986         size_t pfp_req_size, me_req_size, ce_req_size,
1987                 mec_req_size, rlc_req_size, mc_req_size = 0,
1988                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989         char fw_name[30];
1990         int new_fw = 0;
1991         int err;
1992         int num_fw;
1993
1994         DRM_DEBUG("\n");
1995
1996         switch (rdev->family) {
1997         case CHIP_BONAIRE:
1998                 chip_name = "BONAIRE";
1999                 new_chip_name = "bonaire";
2000                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2002                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009                 num_fw = 8;
2010                 break;
2011         case CHIP_HAWAII:
2012                 chip_name = "HAWAII";
2013                 new_chip_name = "hawaii";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023                 num_fw = 8;
2024                 break;
2025         case CHIP_KAVERI:
2026                 chip_name = "KAVERI";
2027                 new_chip_name = "kaveri";
2028                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                 num_fw = 7;
2035                 break;
2036         case CHIP_KABINI:
2037                 chip_name = "KABINI";
2038                 new_chip_name = "kabini";
2039                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                 num_fw = 6;
2046                 break;
2047         case CHIP_MULLINS:
2048                 chip_name = "MULLINS";
2049                 new_chip_name = "mullins";
2050                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2052                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056                 num_fw = 6;
2057                 break;
2058         default: BUG();
2059         }
2060
2061         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065         if (err) {
2066                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068                 if (err)
2069                         goto out;
2070                 if (rdev->pfp_fw->size != pfp_req_size) {
2071                         printk(KERN_ERR
2072                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073                                rdev->pfp_fw->size, fw_name);
2074                         err = -EINVAL;
2075                         goto out;
2076                 }
2077         } else {
2078                 err = radeon_ucode_validate(rdev->pfp_fw);
2079                 if (err) {
2080                         printk(KERN_ERR
2081                                "cik_fw: validation failed for firmware \"%s\"\n",
2082                                fw_name);
2083                         goto out;
2084                 } else {
2085                         new_fw++;
2086                 }
2087         }
2088
2089         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091         if (err) {
2092                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094                 if (err)
2095                         goto out;
2096                 if (rdev->me_fw->size != me_req_size) {
2097                         printk(KERN_ERR
2098                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->me_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->me_fw);
2104                 if (err) {
2105                         printk(KERN_ERR
2106                                "cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         printk(KERN_ERR
2123                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124                                rdev->ce_fw->size, fw_name);
2125                         err = -EINVAL;
2126                 }
2127         } else {
2128                 err = radeon_ucode_validate(rdev->ce_fw);
2129                 if (err) {
2130                         printk(KERN_ERR
2131                                "cik_fw: validation failed for firmware \"%s\"\n",
2132                                fw_name);
2133                         goto out;
2134                 } else {
2135                         new_fw++;
2136                 }
2137         }
2138
2139         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141         if (err) {
2142                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144                 if (err)
2145                         goto out;
2146                 if (rdev->mec_fw->size != mec_req_size) {
2147                         printk(KERN_ERR
2148                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149                                rdev->mec_fw->size, fw_name);
2150                         err = -EINVAL;
2151                 }
2152         } else {
2153                 err = radeon_ucode_validate(rdev->mec_fw);
2154                 if (err) {
2155                         printk(KERN_ERR
2156                                "cik_fw: validation failed for firmware \"%s\"\n",
2157                                fw_name);
2158                         goto out;
2159                 } else {
2160                         new_fw++;
2161                 }
2162         }
2163
2164         if (rdev->family == CHIP_KAVERI) {
2165                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167                 if (err) {
2168                         goto out;
2169                 } else {
2170                         err = radeon_ucode_validate(rdev->mec2_fw);
2171                         if (err) {
2172                                 goto out;
2173                         } else {
2174                                 new_fw++;
2175                         }
2176                 }
2177         }
2178
2179         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181         if (err) {
2182                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184                 if (err)
2185                         goto out;
2186                 if (rdev->rlc_fw->size != rlc_req_size) {
2187                         printk(KERN_ERR
2188                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189                                rdev->rlc_fw->size, fw_name);
2190                         err = -EINVAL;
2191                 }
2192         } else {
2193                 err = radeon_ucode_validate(rdev->rlc_fw);
2194                 if (err) {
2195                         printk(KERN_ERR
2196                                "cik_fw: validation failed for firmware \"%s\"\n",
2197                                fw_name);
2198                         goto out;
2199                 } else {
2200                         new_fw++;
2201                 }
2202         }
2203
2204         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206         if (err) {
2207                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209                 if (err)
2210                         goto out;
2211                 if (rdev->sdma_fw->size != sdma_req_size) {
2212                         printk(KERN_ERR
2213                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214                                rdev->sdma_fw->size, fw_name);
2215                         err = -EINVAL;
2216                 }
2217         } else {
2218                 err = radeon_ucode_validate(rdev->sdma_fw);
2219                 if (err) {
2220                         printk(KERN_ERR
2221                                "cik_fw: validation failed for firmware \"%s\"\n",
2222                                fw_name);
2223                         goto out;
2224                 } else {
2225                         new_fw++;
2226                 }
2227         }
2228
2229         /* No SMC, MC ucode on APUs */
2230         if (!(rdev->flags & RADEON_IS_IGP)) {
2231                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233                 if (err) {
2234                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236                         if (err) {
2237                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239                                 if (err)
2240                                         goto out;
2241                         }
2242                         if ((rdev->mc_fw->size != mc_req_size) &&
2243                             (rdev->mc_fw->size != mc2_req_size)){
2244                                 printk(KERN_ERR
2245                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246                                        rdev->mc_fw->size, fw_name);
2247                                 err = -EINVAL;
2248                         }
2249                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250                 } else {
2251                         err = radeon_ucode_validate(rdev->mc_fw);
2252                         if (err) {
2253                                 printk(KERN_ERR
2254                                        "cik_fw: validation failed for firmware \"%s\"\n",
2255                                        fw_name);
2256                                 goto out;
2257                         } else {
2258                                 new_fw++;
2259                         }
2260                 }
2261
2262                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264                 if (err) {
2265                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267                         if (err) {
2268                                 printk(KERN_ERR
2269                                        "smc: error loading firmware \"%s\"\n",
2270                                        fw_name);
2271                                 release_firmware(rdev->smc_fw);
2272                                 rdev->smc_fw = NULL;
2273                                 err = 0;
2274                         } else if (rdev->smc_fw->size != smc_req_size) {
2275                                 printk(KERN_ERR
2276                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277                                        rdev->smc_fw->size, fw_name);
2278                                 err = -EINVAL;
2279                         }
2280                 } else {
2281                         err = radeon_ucode_validate(rdev->smc_fw);
2282                         if (err) {
2283                                 printk(KERN_ERR
2284                                        "cik_fw: validation failed for firmware \"%s\"\n",
2285                                        fw_name);
2286                                 goto out;
2287                         } else {
2288                                 new_fw++;
2289                         }
2290                 }
2291         }
2292
2293         if (new_fw == 0) {
2294                 rdev->new_fw = false;
2295         } else if (new_fw < num_fw) {
2296                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297                 err = -EINVAL;
2298         } else {
2299                 rdev->new_fw = true;
2300         }
2301
2302 out:
2303         if (err) {
2304                 if (err != -EINVAL)
2305                         printk(KERN_ERR
2306                                "cik_cp: Failed to load firmware \"%s\"\n",
2307                                fw_name);
2308                 release_firmware(rdev->pfp_fw);
2309                 rdev->pfp_fw = NULL;
2310                 release_firmware(rdev->me_fw);
2311                 rdev->me_fw = NULL;
2312                 release_firmware(rdev->ce_fw);
2313                 rdev->ce_fw = NULL;
2314                 release_firmware(rdev->mec_fw);
2315                 rdev->mec_fw = NULL;
2316                 release_firmware(rdev->mec2_fw);
2317                 rdev->mec2_fw = NULL;
2318                 release_firmware(rdev->rlc_fw);
2319                 rdev->rlc_fw = NULL;
2320                 release_firmware(rdev->sdma_fw);
2321                 rdev->sdma_fw = NULL;
2322                 release_firmware(rdev->mc_fw);
2323                 rdev->mc_fw = NULL;
2324                 release_firmware(rdev->smc_fw);
2325                 rdev->smc_fw = NULL;
2326         }
2327         return err;
2328 }
2329
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346         const u32 num_tile_mode_states = 32;
2347         const u32 num_secondary_tile_mode_states = 16;
2348         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349         u32 num_pipe_configs;
2350         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351                 rdev->config.cik.max_shader_engines;
2352
2353         switch (rdev->config.cik.mem_row_size_in_kb) {
2354         case 1:
2355                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356                 break;
2357         case 2:
2358         default:
2359                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360                 break;
2361         case 4:
2362                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363                 break;
2364         }
2365
2366         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367         if (num_pipe_configs > 8)
2368                 num_pipe_configs = 16;
2369
2370         if (num_pipe_configs == 16) {
2371                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372                         switch (reg_offset) {
2373                         case 0:
2374                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378                                 break;
2379                         case 1:
2380                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384                                 break;
2385                         case 2:
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390                                 break;
2391                         case 3:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396                                 break;
2397                         case 4:
2398                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                                  TILE_SPLIT(split_equal_to_row_size));
2402                                 break;
2403                         case 5:
2404                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                                 break;
2408                         case 6:
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413                                 break;
2414                         case 7:
2415                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                                  TILE_SPLIT(split_equal_to_row_size));
2419                                 break;
2420                         case 8:
2421                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423                                 break;
2424                         case 9:
2425                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428                                 break;
2429                         case 10:
2430                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                                 break;
2435                         case 11:
2436                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                                 break;
2441                         case 12:
2442                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446                                 break;
2447                         case 13:
2448                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451                                 break;
2452                         case 14:
2453                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457                                 break;
2458                         case 16:
2459                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                                 break;
2464                         case 17:
2465                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469                                 break;
2470                         case 27:
2471                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474                                 break;
2475                         case 28:
2476                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                                 break;
2481                         case 29:
2482                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486                                 break;
2487                         case 30:
2488                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                                 break;
2493                         default:
2494                                 gb_tile_moden = 0;
2495                                 break;
2496                         }
2497                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499                 }
2500                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501                         switch (reg_offset) {
2502                         case 0:
2503                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2507                                 break;
2508                         case 1:
2509                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2513                                 break;
2514                         case 2:
2515                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2519                                 break;
2520                         case 3:
2521                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2525                                 break;
2526                         case 4:
2527                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2531                                 break;
2532                         case 5:
2533                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2537                                 break;
2538                         case 6:
2539                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2543                                 break;
2544                         case 8:
2545                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2549                                 break;
2550                         case 9:
2551                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2555                                 break;
2556                         case 10:
2557                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2561                                 break;
2562                         case 11:
2563                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2567                                 break;
2568                         case 12:
2569                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2573                                 break;
2574                         case 13:
2575                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2579                                 break;
2580                         case 14:
2581                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2585                                 break;
2586                         default:
2587                                 gb_tile_moden = 0;
2588                                 break;
2589                         }
2590                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592                 }
2593         } else if (num_pipe_configs == 8) {
2594                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595                         switch (reg_offset) {
2596                         case 0:
2597                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601                                 break;
2602                         case 1:
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607                                 break;
2608                         case 2:
2609                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613                                 break;
2614                         case 3:
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619                                 break;
2620                         case 4:
2621                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                                  TILE_SPLIT(split_equal_to_row_size));
2625                                 break;
2626                         case 5:
2627                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630                                 break;
2631                         case 6:
2632                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636                                 break;
2637                         case 7:
2638                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641                                                  TILE_SPLIT(split_equal_to_row_size));
2642                                 break;
2643                         case 8:
2644                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646                                 break;
2647                         case 9:
2648                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651                                 break;
2652                         case 10:
2653                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                                 break;
2658                         case 11:
2659                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663                                 break;
2664                         case 12:
2665                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669                                 break;
2670                         case 13:
2671                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674                                 break;
2675                         case 14:
2676                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680                                 break;
2681                         case 16:
2682                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686                                 break;
2687                         case 17:
2688                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                                 break;
2693                         case 27:
2694                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697                                 break;
2698                         case 28:
2699                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                                 break;
2704                         case 29:
2705                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709                                 break;
2710                         case 30:
2711                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                                 break;
2716                         default:
2717                                 gb_tile_moden = 0;
2718                                 break;
2719                         }
2720                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722                 }
2723                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724                         switch (reg_offset) {
2725                         case 0:
2726                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2730                                 break;
2731                         case 1:
2732                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2736                                 break;
2737                         case 2:
2738                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2742                                 break;
2743                         case 3:
2744                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2748                                 break;
2749                         case 4:
2750                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2754                                 break;
2755                         case 5:
2756                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2760                                 break;
2761                         case 6:
2762                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2766                                 break;
2767                         case 8:
2768                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2772                                 break;
2773                         case 9:
2774                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2778                                 break;
2779                         case 10:
2780                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2784                                 break;
2785                         case 11:
2786                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2790                                 break;
2791                         case 12:
2792                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2796                                 break;
2797                         case 13:
2798                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2802                                 break;
2803                         case 14:
2804                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2808                                 break;
2809                         default:
2810                                 gb_tile_moden = 0;
2811                                 break;
2812                         }
2813                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815                 }
2816         } else if (num_pipe_configs == 4) {
2817                 if (num_rbs == 4) {
2818                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819                                 switch (reg_offset) {
2820                                 case 0:
2821                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825                                         break;
2826                                 case 1:
2827                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831                                         break;
2832                                 case 2:
2833                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837                                         break;
2838                                 case 3:
2839                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843                                         break;
2844                                 case 4:
2845                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                                          TILE_SPLIT(split_equal_to_row_size));
2849                                         break;
2850                                 case 5:
2851                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854                                         break;
2855                                 case 6:
2856                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860                                         break;
2861                                 case 7:
2862                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                                          TILE_SPLIT(split_equal_to_row_size));
2866                                         break;
2867                                 case 8:
2868                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870                                         break;
2871                                 case 9:
2872                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875                                         break;
2876                                 case 10:
2877                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881                                         break;
2882                                 case 11:
2883                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887                                         break;
2888                                 case 12:
2889                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893                                         break;
2894                                 case 13:
2895                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898                                         break;
2899                                 case 14:
2900                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904                                         break;
2905                                 case 16:
2906                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910                                         break;
2911                                 case 17:
2912                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916                                         break;
2917                                 case 27:
2918                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921                                         break;
2922                                 case 28:
2923                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927                                         break;
2928                                 case 29:
2929                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933                                         break;
2934                                 case 30:
2935                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                                         break;
2940                                 default:
2941                                         gb_tile_moden = 0;
2942                                         break;
2943                                 }
2944                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946                         }
2947                 } else if (num_rbs < 4) {
2948                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949                                 switch (reg_offset) {
2950                                 case 0:
2951                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955                                         break;
2956                                 case 1:
2957                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961                                         break;
2962                                 case 2:
2963                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967                                         break;
2968                                 case 3:
2969                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973                                         break;
2974                                 case 4:
2975                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978                                                          TILE_SPLIT(split_equal_to_row_size));
2979                                         break;
2980                                 case 5:
2981                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984                                         break;
2985                                 case 6:
2986                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990                                         break;
2991                                 case 7:
2992                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995                                                          TILE_SPLIT(split_equal_to_row_size));
2996                                         break;
2997                                 case 8:
2998                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000                                         break;
3001                                 case 9:
3002                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005                                         break;
3006                                 case 10:
3007                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                                         break;
3012                                 case 11:
3013                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                                         break;
3018                                 case 12:
3019                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023                                         break;
3024                                 case 13:
3025                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028                                         break;
3029                                 case 14:
3030                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034                                         break;
3035                                 case 16:
3036                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040                                         break;
3041                                 case 17:
3042                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046                                         break;
3047                                 case 27:
3048                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051                                         break;
3052                                 case 28:
3053                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057                                         break;
3058                                 case 29:
3059                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063                                         break;
3064                                 case 30:
3065                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                                         break;
3070                                 default:
3071                                         gb_tile_moden = 0;
3072                                         break;
3073                                 }
3074                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076                         }
3077                 }
3078                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079                         switch (reg_offset) {
3080                         case 0:
3081                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3085                                 break;
3086                         case 1:
3087                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3091                                 break;
3092                         case 2:
3093                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3097                                 break;
3098                         case 3:
3099                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3103                                 break;
3104                         case 4:
3105                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3109                                 break;
3110                         case 5:
3111                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3115                                 break;
3116                         case 6:
3117                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3121                                 break;
3122                         case 8:
3123                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3127                                 break;
3128                         case 9:
3129                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3133                                 break;
3134                         case 10:
3135                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3139                                 break;
3140                         case 11:
3141                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3145                                 break;
3146                         case 12:
3147                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3151                                 break;
3152                         case 13:
3153                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3157                                 break;
3158                         case 14:
3159                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3163                                 break;
3164                         default:
3165                                 gb_tile_moden = 0;
3166                                 break;
3167                         }
3168                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170                 }
3171         } else if (num_pipe_configs == 2) {
3172                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173                         switch (reg_offset) {
3174                         case 0:
3175                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179                                 break;
3180                         case 1:
3181                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185                                 break;
3186                         case 2:
3187                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191                                 break;
3192                         case 3:
3193                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3196                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197                                 break;
3198                         case 4:
3199                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                                  TILE_SPLIT(split_equal_to_row_size));
3203                                 break;
3204                         case 5:
3205                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208                                 break;
3209                         case 6:
3210                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214                                 break;
3215                         case 7:
3216                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3219                                                  TILE_SPLIT(split_equal_to_row_size));
3220                                 break;
3221                         case 8:
3222                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223                                                 PIPE_CONFIG(ADDR_SURF_P2);
3224                                 break;
3225                         case 9:
3226                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228                                                  PIPE_CONFIG(ADDR_SURF_P2));
3229                                 break;
3230                         case 10:
3231                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                                 break;
3236                         case 11:
3237                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241                                 break;
3242                         case 12:
3243                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                                 break;
3248                         case 13:
3249                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252                                 break;
3253                         case 14:
3254                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258                                 break;
3259                         case 16:
3260                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264                                 break;
3265                         case 17:
3266                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                                 break;
3271                         case 27:
3272                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274                                                  PIPE_CONFIG(ADDR_SURF_P2));
3275                                 break;
3276                         case 28:
3277                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3280                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281                                 break;
3282                         case 29:
3283                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287                                 break;
3288                         case 30:
3289                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3292                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293                                 break;
3294                         default:
3295                                 gb_tile_moden = 0;
3296                                 break;
3297                         }
3298                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300                 }
3301                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302                         switch (reg_offset) {
3303                         case 0:
3304                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3308                                 break;
3309                         case 1:
3310                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3314                                 break;
3315                         case 2:
3316                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3320                                 break;
3321                         case 3:
3322                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3326                                 break;
3327                         case 4:
3328                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3332                                 break;
3333                         case 5:
3334                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3338                                 break;
3339                         case 6:
3340                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3344                                 break;
3345                         case 8:
3346                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3350                                 break;
3351                         case 9:
3352                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3356                                 break;
3357                         case 10:
3358                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3362                                 break;
3363                         case 11:
3364                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3368                                 break;
3369                         case 12:
3370                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3374                                 break;
3375                         case 13:
3376                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3380                                 break;
3381                         case 14:
3382                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3386                                 break;
3387                         default:
3388                                 gb_tile_moden = 0;
3389                                 break;
3390                         }
3391                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393                 }
3394         } else
3395                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410                              u32 se_num, u32 sh_num)
3411 {
3412         u32 data = INSTANCE_BROADCAST_WRITES;
3413
3414         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416         else if (se_num == 0xffffffff)
3417                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418         else if (sh_num == 0xffffffff)
3419                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420         else
3421                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422         WREG32(GRBM_GFX_INDEX, data);
3423 }
3424
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435         u32 i, mask = 0;
3436
3437         for (i = 0; i < bit_width; i++) {
3438                 mask <<= 1;
3439                 mask |= 1;
3440         }
3441         return mask;
3442 }
3443
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456                               u32 max_rb_num_per_se,
3457                               u32 sh_per_se)
3458 {
3459         u32 data, mask;
3460
3461         data = RREG32(CC_RB_BACKEND_DISABLE);
3462         if (data & 1)
3463                 data &= BACKEND_DISABLE_MASK;
3464         else
3465                 data = 0;
3466         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467
3468         data >>= BACKEND_DISABLE_SHIFT;
3469
3470         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471
3472         return data & mask;
3473 }
3474
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486                          u32 se_num, u32 sh_per_se,
3487                          u32 max_rb_num_per_se)
3488 {
3489         int i, j;
3490         u32 data, mask;
3491         u32 disabled_rbs = 0;
3492         u32 enabled_rbs = 0;
3493
3494         mutex_lock(&rdev->grbm_idx_mutex);
3495         for (i = 0; i < se_num; i++) {
3496                 for (j = 0; j < sh_per_se; j++) {
3497                         cik_select_se_sh(rdev, i, j);
3498                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499                         if (rdev->family == CHIP_HAWAII)
3500                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501                         else
3502                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503                 }
3504         }
3505         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506         mutex_unlock(&rdev->grbm_idx_mutex);
3507
3508         mask = 1;
3509         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510                 if (!(disabled_rbs & mask))
3511                         enabled_rbs |= mask;
3512                 mask <<= 1;
3513         }
3514
3515         rdev->config.cik.backend_enable_mask = enabled_rbs;
3516
3517         mutex_lock(&rdev->grbm_idx_mutex);
3518         for (i = 0; i < se_num; i++) {
3519                 cik_select_se_sh(rdev, i, 0xffffffff);
3520                 data = 0;
3521                 for (j = 0; j < sh_per_se; j++) {
3522                         switch (enabled_rbs & 3) {
3523                         case 0:
3524                                 if (j == 0)
3525                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526                                 else
3527                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528                                 break;
3529                         case 1:
3530                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531                                 break;
3532                         case 2:
3533                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534                                 break;
3535                         case 3:
3536                         default:
3537                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538                                 break;
3539                         }
3540                         enabled_rbs >>= 2;
3541                 }
3542                 WREG32(PA_SC_RASTER_CONFIG, data);
3543         }
3544         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545         mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559         u32 mc_shared_chmap, mc_arb_ramcfg;
3560         u32 hdp_host_path_cntl;
3561         u32 tmp;
3562         int i, j;
3563
3564         switch (rdev->family) {
3565         case CHIP_BONAIRE:
3566                 rdev->config.cik.max_shader_engines = 2;
3567                 rdev->config.cik.max_tile_pipes = 4;
3568                 rdev->config.cik.max_cu_per_sh = 7;
3569                 rdev->config.cik.max_sh_per_se = 1;
3570                 rdev->config.cik.max_backends_per_se = 2;
3571                 rdev->config.cik.max_texture_channel_caches = 4;
3572                 rdev->config.cik.max_gprs = 256;
3573                 rdev->config.cik.max_gs_threads = 32;
3574                 rdev->config.cik.max_hw_contexts = 8;
3575
3576                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581                 break;
3582         case CHIP_HAWAII:
3583                 rdev->config.cik.max_shader_engines = 4;
3584                 rdev->config.cik.max_tile_pipes = 16;
3585                 rdev->config.cik.max_cu_per_sh = 11;
3586                 rdev->config.cik.max_sh_per_se = 1;
3587                 rdev->config.cik.max_backends_per_se = 4;
3588                 rdev->config.cik.max_texture_channel_caches = 16;
3589                 rdev->config.cik.max_gprs = 256;
3590                 rdev->config.cik.max_gs_threads = 32;
3591                 rdev->config.cik.max_hw_contexts = 8;
3592
3593                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598                 break;
3599         case CHIP_KAVERI:
3600                 rdev->config.cik.max_shader_engines = 1;
3601                 rdev->config.cik.max_tile_pipes = 4;
3602                 if ((rdev->pdev->device == 0x1304) ||
3603                     (rdev->pdev->device == 0x1305) ||
3604                     (rdev->pdev->device == 0x130C) ||
3605                     (rdev->pdev->device == 0x130F) ||
3606                     (rdev->pdev->device == 0x1310) ||
3607                     (rdev->pdev->device == 0x1311) ||
3608                     (rdev->pdev->device == 0x131C)) {
3609                         rdev->config.cik.max_cu_per_sh = 8;
3610                         rdev->config.cik.max_backends_per_se = 2;
3611                 } else if ((rdev->pdev->device == 0x1309) ||
3612                            (rdev->pdev->device == 0x130A) ||
3613                            (rdev->pdev->device == 0x130D) ||
3614                            (rdev->pdev->device == 0x1313) ||
3615                            (rdev->pdev->device == 0x131D)) {
3616                         rdev->config.cik.max_cu_per_sh = 6;
3617                         rdev->config.cik.max_backends_per_se = 2;
3618                 } else if ((rdev->pdev->device == 0x1306) ||
3619                            (rdev->pdev->device == 0x1307) ||
3620                            (rdev->pdev->device == 0x130B) ||
3621                            (rdev->pdev->device == 0x130E) ||
3622                            (rdev->pdev->device == 0x1315) ||
3623                            (rdev->pdev->device == 0x1318) ||
3624                            (rdev->pdev->device == 0x131B)) {
3625                         rdev->config.cik.max_cu_per_sh = 4;
3626                         rdev->config.cik.max_backends_per_se = 1;
3627                 } else {
3628                         rdev->config.cik.max_cu_per_sh = 3;
3629                         rdev->config.cik.max_backends_per_se = 1;
3630                 }
3631                 rdev->config.cik.max_sh_per_se = 1;
3632                 rdev->config.cik.max_texture_channel_caches = 4;
3633                 rdev->config.cik.max_gprs = 256;
3634                 rdev->config.cik.max_gs_threads = 16;
3635                 rdev->config.cik.max_hw_contexts = 8;
3636
3637                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642                 break;
3643         case CHIP_KABINI:
3644         case CHIP_MULLINS:
3645         default:
3646                 rdev->config.cik.max_shader_engines = 1;
3647                 rdev->config.cik.max_tile_pipes = 2;
3648                 rdev->config.cik.max_cu_per_sh = 2;
3649                 rdev->config.cik.max_sh_per_se = 1;
3650                 rdev->config.cik.max_backends_per_se = 1;
3651                 rdev->config.cik.max_texture_channel_caches = 2;
3652                 rdev->config.cik.max_gprs = 256;
3653                 rdev->config.cik.max_gs_threads = 16;
3654                 rdev->config.cik.max_hw_contexts = 8;
3655
3656                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661                 break;
3662         }
3663
3664         /* Initialize HDP */
3665         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666                 WREG32((0x2c14 + j), 0x00000000);
3667                 WREG32((0x2c18 + j), 0x00000000);
3668                 WREG32((0x2c1c + j), 0x00000000);
3669                 WREG32((0x2c20 + j), 0x00000000);
3670                 WREG32((0x2c24 + j), 0x00000000);
3671         }
3672
3673         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674         WREG32(SRBM_INT_CNTL, 0x1);
3675         WREG32(SRBM_INT_ACK, 0x1);
3676
3677         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678
3679         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681
3682         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683         rdev->config.cik.mem_max_burst_length_bytes = 256;
3684         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686         if (rdev->config.cik.mem_row_size_in_kb > 4)
3687                 rdev->config.cik.mem_row_size_in_kb = 4;
3688         /* XXX use MC settings? */
3689         rdev->config.cik.shader_engine_tile_size = 32;
3690         rdev->config.cik.num_gpus = 1;
3691         rdev->config.cik.multi_gpu_tile_size = 64;
3692
3693         /* fix up row size */
3694         gb_addr_config &= ~ROW_SIZE_MASK;
3695         switch (rdev->config.cik.mem_row_size_in_kb) {
3696         case 1:
3697         default:
3698                 gb_addr_config |= ROW_SIZE(0);
3699                 break;
3700         case 2:
3701                 gb_addr_config |= ROW_SIZE(1);
3702                 break;
3703         case 4:
3704                 gb_addr_config |= ROW_SIZE(2);
3705                 break;
3706         }
3707
3708         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3709          * not have bank info, so create a custom tiling dword.
3710          * bits 3:0   num_pipes
3711          * bits 7:4   num_banks
3712          * bits 11:8  group_size
3713          * bits 15:12 row_size
3714          */
3715         rdev->config.cik.tile_config = 0;
3716         switch (rdev->config.cik.num_tile_pipes) {
3717         case 1:
3718                 rdev->config.cik.tile_config |= (0 << 0);
3719                 break;
3720         case 2:
3721                 rdev->config.cik.tile_config |= (1 << 0);
3722                 break;
3723         case 4:
3724                 rdev->config.cik.tile_config |= (2 << 0);
3725                 break;
3726         case 8:
3727         default:
3728                 /* XXX what about 12? */
3729                 rdev->config.cik.tile_config |= (3 << 0);
3730                 break;
3731         }
3732         rdev->config.cik.tile_config |=
3733                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734         rdev->config.cik.tile_config |=
3735                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736         rdev->config.cik.tile_config |=
3737                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738
3739         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747
3748         cik_tiling_mode_table_init(rdev);
3749
3750         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751                      rdev->config.cik.max_sh_per_se,
3752                      rdev->config.cik.max_backends_per_se);
3753
3754         rdev->config.cik.active_cus = 0;
3755         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757                         rdev->config.cik.active_cus +=
3758                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759                 }
3760         }
3761
3762         /* set HW defaults for 3D engine */
3763         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764
3765         mutex_lock(&rdev->grbm_idx_mutex);
3766         /*
3767          * making sure that the following register writes will be broadcasted
3768          * to all the shaders
3769          */
3770         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771         WREG32(SX_DEBUG_1, 0x20);
3772
3773         WREG32(TA_CNTL_AUX, 0x00010000);
3774
3775         tmp = RREG32(SPI_CONFIG_CNTL);
3776         tmp |= 0x03000000;
3777         WREG32(SPI_CONFIG_CNTL, tmp);
3778
3779         WREG32(SQ_CONFIG, 1);
3780
3781         WREG32(DB_DEBUG, 0);
3782
3783         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784         tmp |= 0x00000400;
3785         WREG32(DB_DEBUG2, tmp);
3786
3787         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788         tmp |= 0x00020200;
3789         WREG32(DB_DEBUG3, tmp);
3790
3791         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792         tmp |= 0x00018208;
3793         WREG32(CB_HW_CONTROL, tmp);
3794
3795         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796
3797         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801
3802         WREG32(VGT_NUM_INSTANCES, 1);
3803
3804         WREG32(CP_PERFMON_CNTL, 0);
3805
3806         WREG32(SQ_CONFIG, 0);
3807
3808         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809                                           FORCE_EOV_MAX_REZ_CNT(255)));
3810
3811         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813
3814         WREG32(VGT_GS_VERTEX_REUSE, 16);
3815         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816
3817         tmp = RREG32(HDP_MISC_CNTL);
3818         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819         WREG32(HDP_MISC_CNTL, tmp);
3820
3821         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823
3824         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826         mutex_unlock(&rdev->grbm_idx_mutex);
3827
3828         udelay(50);
3829 }
3830
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846         int i;
3847
3848         rdev->scratch.num_reg = 7;
3849         rdev->scratch.reg_base = SCRATCH_REG0;
3850         for (i = 0; i < rdev->scratch.num_reg; i++) {
3851                 rdev->scratch.free[i] = true;
3852                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853         }
3854 }
3855
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869         uint32_t scratch;
3870         uint32_t tmp = 0;
3871         unsigned i;
3872         int r;
3873
3874         r = radeon_scratch_get(rdev, &scratch);
3875         if (r) {
3876                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877                 return r;
3878         }
3879         WREG32(scratch, 0xCAFEDEAD);
3880         r = radeon_ring_lock(rdev, ring, 3);
3881         if (r) {
3882                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883                 radeon_scratch_free(rdev, scratch);
3884                 return r;
3885         }
3886         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888         radeon_ring_write(ring, 0xDEADBEEF);
3889         radeon_ring_unlock_commit(rdev, ring, false);
3890
3891         for (i = 0; i < rdev->usec_timeout; i++) {
3892                 tmp = RREG32(scratch);
3893                 if (tmp == 0xDEADBEEF)
3894                         break;
3895                 DRM_UDELAY(1);
3896         }
3897         if (i < rdev->usec_timeout) {
3898                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899         } else {
3900                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901                           ring->idx, scratch, tmp);
3902                 r = -EINVAL;
3903         }
3904         radeon_scratch_free(rdev, scratch);
3905         return r;
3906 }
3907
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917                                        int ridx)
3918 {
3919         struct radeon_ring *ring = &rdev->ring[ridx];
3920         u32 ref_and_mask;
3921
3922         switch (ring->idx) {
3923         case CAYMAN_RING_TYPE_CP1_INDEX:
3924         case CAYMAN_RING_TYPE_CP2_INDEX:
3925         default:
3926                 switch (ring->me) {
3927                 case 0:
3928                         ref_and_mask = CP2 << ring->pipe;
3929                         break;
3930                 case 1:
3931                         ref_and_mask = CP6 << ring->pipe;
3932                         break;
3933                 default:
3934                         return;
3935                 }
3936                 break;
3937         case RADEON_RING_TYPE_GFX_INDEX:
3938                 ref_and_mask = CP0;
3939                 break;
3940         }
3941
3942         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948         radeon_ring_write(ring, ref_and_mask);
3949         radeon_ring_write(ring, ref_and_mask);
3950         radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963                              struct radeon_fence *fence)
3964 {
3965         struct radeon_ring *ring = &rdev->ring[fence->ring];
3966         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967
3968         /* Workaround for cache flush problems. First send a dummy EOP
3969          * event down the pipe with seq one below.
3970          */
3971         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973                                  EOP_TC_ACTION_EN |
3974                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975                                  EVENT_INDEX(5)));
3976         radeon_ring_write(ring, addr & 0xfffffffc);
3977         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978                                 DATA_SEL(1) | INT_SEL(0));
3979         radeon_ring_write(ring, fence->seq - 1);
3980         radeon_ring_write(ring, 0);
3981
3982         /* Then send the real EOP event down the pipe. */
3983         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985                                  EOP_TC_ACTION_EN |
3986                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987                                  EVENT_INDEX(5)));
3988         radeon_ring_write(ring, addr & 0xfffffffc);
3989         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990         radeon_ring_write(ring, fence->seq);
3991         radeon_ring_write(ring, 0);
3992 }
3993
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004                                  struct radeon_fence *fence)
4005 {
4006         struct radeon_ring *ring = &rdev->ring[fence->ring];
4007         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008
4009         /* RELEASE_MEM - flush caches, send int */
4010         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012                                  EOP_TC_ACTION_EN |
4013                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014                                  EVENT_INDEX(5)));
4015         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016         radeon_ring_write(ring, addr & 0xfffffffc);
4017         radeon_ring_write(ring, upper_32_bits(addr));
4018         radeon_ring_write(ring, fence->seq);
4019         radeon_ring_write(ring, 0);
4020 }
4021
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034                              struct radeon_ring *ring,
4035                              struct radeon_semaphore *semaphore,
4036                              bool emit_wait)
4037 {
4038         uint64_t addr = semaphore->gpu_addr;
4039         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040
4041         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042         radeon_ring_write(ring, lower_32_bits(addr));
4043         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044
4045         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046                 /* Prevent the PFP from running ahead of the semaphore wait */
4047                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048                 radeon_ring_write(ring, 0x0);
4049         }
4050
4051         return true;
4052 }
4053
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068                                     uint64_t src_offset, uint64_t dst_offset,
4069                                     unsigned num_gpu_pages,
4070                                     struct reservation_object *resv)
4071 {
4072         struct radeon_fence *fence;
4073         struct radeon_sync sync;
4074         int ring_index = rdev->asic->copy.blit_ring_index;
4075         struct radeon_ring *ring = &rdev->ring[ring_index];
4076         u32 size_in_bytes, cur_size_in_bytes, control;
4077         int i, num_loops;
4078         int r = 0;
4079
4080         radeon_sync_create(&sync);
4081
4082         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085         if (r) {
4086                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4087                 radeon_sync_free(rdev, &sync, NULL);
4088                 return ERR_PTR(r);
4089         }
4090
4091         radeon_sync_resv(rdev, &sync, resv, false);
4092         radeon_sync_rings(rdev, &sync, ring->idx);
4093
4094         for (i = 0; i < num_loops; i++) {
4095                 cur_size_in_bytes = size_in_bytes;
4096                 if (cur_size_in_bytes > 0x1fffff)
4097                         cur_size_in_bytes = 0x1fffff;
4098                 size_in_bytes -= cur_size_in_bytes;
4099                 control = 0;
4100                 if (size_in_bytes == 0)
4101                         control |= PACKET3_DMA_DATA_CP_SYNC;
4102                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103                 radeon_ring_write(ring, control);
4104                 radeon_ring_write(ring, lower_32_bits(src_offset));
4105                 radeon_ring_write(ring, upper_32_bits(src_offset));
4106                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4107                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4108                 radeon_ring_write(ring, cur_size_in_bytes);
4109                 src_offset += cur_size_in_bytes;
4110                 dst_offset += cur_size_in_bytes;
4111         }
4112
4113         r = radeon_fence_emit(rdev, &fence, ring->idx);
4114         if (r) {
4115                 radeon_ring_unlock_undo(rdev, ring);
4116                 radeon_sync_free(rdev, &sync, NULL);
4117                 return ERR_PTR(r);
4118         }
4119
4120         radeon_ring_unlock_commit(rdev, ring, false);
4121         radeon_sync_free(rdev, &sync, fence);
4122
4123         return fence;
4124 }
4125
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143         struct radeon_ring *ring = &rdev->ring[ib->ring];
4144         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145         u32 header, control = INDIRECT_BUFFER_VALID;
4146
4147         if (ib->is_const_ib) {
4148                 /* set switch buffer packet before const IB */
4149                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150                 radeon_ring_write(ring, 0);
4151
4152                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153         } else {
4154                 u32 next_rptr;
4155                 if (ring->rptr_save_reg) {
4156                         next_rptr = ring->wptr + 3 + 4;
4157                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4159                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4160                         radeon_ring_write(ring, next_rptr);
4161                 } else if (rdev->wb.enabled) {
4162                         next_rptr = ring->wptr + 5 + 4;
4163                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167                         radeon_ring_write(ring, next_rptr);
4168                 }
4169
4170                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171         }
4172
4173         control |= ib->length_dw | (vm_id << 24);
4174
4175         radeon_ring_write(ring, header);
4176         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4177         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4178         radeon_ring_write(ring, control);
4179 }
4180
4181 /**
4182  * cik_ib_test - basic gfx ring IB test
4183  *
4184  * @rdev: radeon_device pointer
4185  * @ring: radeon_ring structure holding ring information
4186  *
4187  * Allocate an IB and execute it on the gfx ring (CIK).
4188  * Provides a basic gfx ring test to verify that IBs are working.
4189  * Returns 0 on success, error on failure.
4190  */
4191 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4192 {
4193         struct radeon_ib ib;
4194         uint32_t scratch;
4195         uint32_t tmp = 0;
4196         unsigned i;
4197         int r;
4198
4199         r = radeon_scratch_get(rdev, &scratch);
4200         if (r) {
4201                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4202                 return r;
4203         }
4204         WREG32(scratch, 0xCAFEDEAD);
4205         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4206         if (r) {
4207                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4208                 radeon_scratch_free(rdev, scratch);
4209                 return r;
4210         }
4211         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4212         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4213         ib.ptr[2] = 0xDEADBEEF;
4214         ib.length_dw = 3;
4215         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4216         if (r) {
4217                 radeon_scratch_free(rdev, scratch);
4218                 radeon_ib_free(rdev, &ib);
4219                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4220                 return r;
4221         }
4222         r = radeon_fence_wait(ib.fence, false);
4223         if (r) {
4224                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4225                 radeon_scratch_free(rdev, scratch);
4226                 radeon_ib_free(rdev, &ib);
4227                 return r;
4228         }
4229         for (i = 0; i < rdev->usec_timeout; i++) {
4230                 tmp = RREG32(scratch);
4231                 if (tmp == 0xDEADBEEF)
4232                         break;
4233                 DRM_UDELAY(1);
4234         }
4235         if (i < rdev->usec_timeout) {
4236                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4237         } else {
4238                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4239                           scratch, tmp);
4240                 r = -EINVAL;
4241         }
4242         radeon_scratch_free(rdev, scratch);
4243         radeon_ib_free(rdev, &ib);
4244         return r;
4245 }
4246
4247 /*
4248  * CP.
4249  * On CIK, gfx and compute now have independant command processors.
4250  *
4251  * GFX
4252  * Gfx consists of a single ring and can process both gfx jobs and
4253  * compute jobs.  The gfx CP consists of three microengines (ME):
4254  * PFP - Pre-Fetch Parser
4255  * ME - Micro Engine
4256  * CE - Constant Engine
4257  * The PFP and ME make up what is considered the Drawing Engine (DE).
4258  * The CE is an asynchronous engine used for updating buffer desciptors
4259  * used by the DE so that they can be loaded into cache in parallel
4260  * while the DE is processing state update packets.
4261  *
4262  * Compute
4263  * The compute CP consists of two microengines (ME):
4264  * MEC1 - Compute MicroEngine 1
4265  * MEC2 - Compute MicroEngine 2
4266  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4267  * The queues are exposed to userspace and are programmed directly
4268  * by the compute runtime.
4269  */
4270 /**
4271  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4272  *
4273  * @rdev: radeon_device pointer
4274  * @enable: enable or disable the MEs
4275  *
4276  * Halts or unhalts the gfx MEs.
4277  */
4278 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4279 {
4280         if (enable)
4281                 WREG32(CP_ME_CNTL, 0);
4282         else {
4283                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4284                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4285                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4286                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4287         }
4288         udelay(50);
4289 }
4290
4291 /**
4292  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4293  *
4294  * @rdev: radeon_device pointer
4295  *
4296  * Loads the gfx PFP, ME, and CE ucode.
4297  * Returns 0 for success, -EINVAL if the ucode is not available.
4298  */
4299 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4300 {
4301         int i;
4302
4303         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4304                 return -EINVAL;
4305
4306         cik_cp_gfx_enable(rdev, false);
4307
4308         if (rdev->new_fw) {
4309                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4310                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4311                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4312                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4313                 const struct gfx_firmware_header_v1_0 *me_hdr =
4314                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4315                 const __le32 *fw_data;
4316                 u32 fw_size;
4317
4318                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4319                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4320                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4321
4322                 /* PFP */
4323                 fw_data = (const __le32 *)
4324                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4325                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4326                 WREG32(CP_PFP_UCODE_ADDR, 0);
4327                 for (i = 0; i < fw_size; i++)
4328                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4329                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4330
4331                 /* CE */
4332                 fw_data = (const __le32 *)
4333                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4334                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4335                 WREG32(CP_CE_UCODE_ADDR, 0);
4336                 for (i = 0; i < fw_size; i++)
4337                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4338                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4339
4340                 /* ME */
4341                 fw_data = (const __be32 *)
4342                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4343                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4344                 WREG32(CP_ME_RAM_WADDR, 0);
4345                 for (i = 0; i < fw_size; i++)
4346                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4347                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4348                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4349         } else {
4350                 const __be32 *fw_data;
4351
4352                 /* PFP */
4353                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4354                 WREG32(CP_PFP_UCODE_ADDR, 0);
4355                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4356                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4357                 WREG32(CP_PFP_UCODE_ADDR, 0);
4358
4359                 /* CE */
4360                 fw_data = (const __be32 *)rdev->ce_fw->data;
4361                 WREG32(CP_CE_UCODE_ADDR, 0);
4362                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4363                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4364                 WREG32(CP_CE_UCODE_ADDR, 0);
4365
4366                 /* ME */
4367                 fw_data = (const __be32 *)rdev->me_fw->data;
4368                 WREG32(CP_ME_RAM_WADDR, 0);
4369                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4370                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4371                 WREG32(CP_ME_RAM_WADDR, 0);
4372         }
4373
4374         return 0;
4375 }
4376
4377 /**
4378  * cik_cp_gfx_start - start the gfx ring
4379  *
4380  * @rdev: radeon_device pointer
4381  *
4382  * Enables the ring and loads the clear state context and other
4383  * packets required to init the ring.
4384  * Returns 0 for success, error for failure.
4385  */
4386 static int cik_cp_gfx_start(struct radeon_device *rdev)
4387 {
4388         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4389         int r, i;
4390
4391         /* init the CP */
4392         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4393         WREG32(CP_ENDIAN_SWAP, 0);
4394         WREG32(CP_DEVICE_ID, 1);
4395
4396         cik_cp_gfx_enable(rdev, true);
4397
4398         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4399         if (r) {
4400                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4401                 return r;
4402         }
4403
4404         /* init the CE partitions.  CE only used for gfx on CIK */
4405         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4406         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4407         radeon_ring_write(ring, 0x8000);
4408         radeon_ring_write(ring, 0x8000);
4409
4410         /* setup clear context state */
4411         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4412         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4413
4414         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4415         radeon_ring_write(ring, 0x80000000);
4416         radeon_ring_write(ring, 0x80000000);
4417
4418         for (i = 0; i < cik_default_size; i++)
4419                 radeon_ring_write(ring, cik_default_state[i]);
4420
4421         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4422         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4423
4424         /* set clear context state */
4425         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4426         radeon_ring_write(ring, 0);
4427
4428         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4429         radeon_ring_write(ring, 0x00000316);
4430         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4431         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4432
4433         radeon_ring_unlock_commit(rdev, ring, false);
4434
4435         return 0;
4436 }
4437
4438 /**
4439  * cik_cp_gfx_fini - stop the gfx ring
4440  *
4441  * @rdev: radeon_device pointer
4442  *
4443  * Stop the gfx ring and tear down the driver ring
4444  * info.
4445  */
4446 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4447 {
4448         cik_cp_gfx_enable(rdev, false);
4449         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4450 }
4451
4452 /**
4453  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4454  *
4455  * @rdev: radeon_device pointer
4456  *
4457  * Program the location and size of the gfx ring buffer
4458  * and test it to make sure it's working.
4459  * Returns 0 for success, error for failure.
4460  */
4461 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4462 {
4463         struct radeon_ring *ring;
4464         u32 tmp;
4465         u32 rb_bufsz;
4466         u64 rb_addr;
4467         int r;
4468
4469         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4470         if (rdev->family != CHIP_HAWAII)
4471                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4472
4473         /* Set the write pointer delay */
4474         WREG32(CP_RB_WPTR_DELAY, 0);
4475
4476         /* set the RB to use vmid 0 */
4477         WREG32(CP_RB_VMID, 0);
4478
4479         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4480
4481         /* ring 0 - compute and gfx */
4482         /* Set ring buffer size */
4483         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4484         rb_bufsz = order_base_2(ring->ring_size / 8);
4485         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4486 #ifdef __BIG_ENDIAN
4487         tmp |= BUF_SWAP_32BIT;
4488 #endif
4489         WREG32(CP_RB0_CNTL, tmp);
4490
4491         /* Initialize the ring buffer's read and write pointers */
4492         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4493         ring->wptr = 0;
4494         WREG32(CP_RB0_WPTR, ring->wptr);
4495
4496         /* set the wb address wether it's enabled or not */
4497         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4498         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4499
4500         /* scratch register shadowing is no longer supported */
4501         WREG32(SCRATCH_UMSK, 0);
4502
4503         if (!rdev->wb.enabled)
4504                 tmp |= RB_NO_UPDATE;
4505
4506         mdelay(1);
4507         WREG32(CP_RB0_CNTL, tmp);
4508
4509         rb_addr = ring->gpu_addr >> 8;
4510         WREG32(CP_RB0_BASE, rb_addr);
4511         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4512
4513         /* start the ring */
4514         cik_cp_gfx_start(rdev);
4515         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4516         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4517         if (r) {
4518                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4519                 return r;
4520         }
4521
4522         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4523                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4524
4525         return 0;
4526 }
4527
4528 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4529                      struct radeon_ring *ring)
4530 {
4531         u32 rptr;
4532
4533         if (rdev->wb.enabled)
4534                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4535         else
4536                 rptr = RREG32(CP_RB0_RPTR);
4537
4538         return rptr;
4539 }
4540
4541 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4542                      struct radeon_ring *ring)
4543 {
4544         u32 wptr;
4545
4546         wptr = RREG32(CP_RB0_WPTR);
4547
4548         return wptr;
4549 }
4550
4551 void cik_gfx_set_wptr(struct radeon_device *rdev,
4552                       struct radeon_ring *ring)
4553 {
4554         WREG32(CP_RB0_WPTR, ring->wptr);
4555         (void)RREG32(CP_RB0_WPTR);
4556 }
4557
4558 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4559                          struct radeon_ring *ring)
4560 {
4561         u32 rptr;
4562
4563         if (rdev->wb.enabled) {
4564                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4565         } else {
4566                 mutex_lock(&rdev->srbm_mutex);
4567                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4568                 rptr = RREG32(CP_HQD_PQ_RPTR);
4569                 cik_srbm_select(rdev, 0, 0, 0, 0);
4570                 mutex_unlock(&rdev->srbm_mutex);
4571         }
4572
4573         return rptr;
4574 }
4575
4576 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4577                          struct radeon_ring *ring)
4578 {
4579         u32 wptr;
4580
4581         if (rdev->wb.enabled) {
4582                 /* XXX check if swapping is necessary on BE */
4583                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4584         } else {
4585                 mutex_lock(&rdev->srbm_mutex);
4586                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4587                 wptr = RREG32(CP_HQD_PQ_WPTR);
4588                 cik_srbm_select(rdev, 0, 0, 0, 0);
4589                 mutex_unlock(&rdev->srbm_mutex);
4590         }
4591
4592         return wptr;
4593 }
4594
4595 void cik_compute_set_wptr(struct radeon_device *rdev,
4596                           struct radeon_ring *ring)
4597 {
4598         /* XXX check if swapping is necessary on BE */
4599         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4600         WDOORBELL32(ring->doorbell_index, ring->wptr);
4601 }
4602
4603 static void cik_compute_stop(struct radeon_device *rdev,
4604                              struct radeon_ring *ring)
4605 {
4606         u32 j, tmp;
4607
4608         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4609         /* Disable wptr polling. */
4610         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611         tmp &= ~WPTR_POLL_EN;
4612         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613         /* Disable HQD. */
4614         if (RREG32(CP_HQD_ACTIVE) & 1) {
4615                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4616                 for (j = 0; j < rdev->usec_timeout; j++) {
4617                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4618                                 break;
4619                         udelay(1);
4620                 }
4621                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4622                 WREG32(CP_HQD_PQ_RPTR, 0);
4623                 WREG32(CP_HQD_PQ_WPTR, 0);
4624         }
4625         cik_srbm_select(rdev, 0, 0, 0, 0);
4626 }
4627
4628 /**
4629  * cik_cp_compute_enable - enable/disable the compute CP MEs
4630  *
4631  * @rdev: radeon_device pointer
4632  * @enable: enable or disable the MEs
4633  *
4634  * Halts or unhalts the compute MEs.
4635  */
4636 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4637 {
4638         if (enable)
4639                 WREG32(CP_MEC_CNTL, 0);
4640         else {
4641                 /*
4642                  * To make hibernation reliable we need to clear compute ring
4643                  * configuration before halting the compute ring.
4644                  */
4645                 mutex_lock(&rdev->srbm_mutex);
4646                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4647                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4648                 mutex_unlock(&rdev->srbm_mutex);
4649
4650                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4651                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4652                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4653         }
4654         udelay(50);
4655 }
4656
4657 /**
4658  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4659  *
4660  * @rdev: radeon_device pointer
4661  *
4662  * Loads the compute MEC1&2 ucode.
4663  * Returns 0 for success, -EINVAL if the ucode is not available.
4664  */
4665 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4666 {
4667         int i;
4668
4669         if (!rdev->mec_fw)
4670                 return -EINVAL;
4671
4672         cik_cp_compute_enable(rdev, false);
4673
4674         if (rdev->new_fw) {
4675                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4676                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4677                 const __le32 *fw_data;
4678                 u32 fw_size;
4679
4680                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4681
4682                 /* MEC1 */
4683                 fw_data = (const __le32 *)
4684                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4685                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4686                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4687                 for (i = 0; i < fw_size; i++)
4688                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4689                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4690
4691                 /* MEC2 */
4692                 if (rdev->family == CHIP_KAVERI) {
4693                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4694                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4695
4696                         fw_data = (const __le32 *)
4697                                 (rdev->mec2_fw->data +
4698                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4699                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4700                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4701                         for (i = 0; i < fw_size; i++)
4702                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4703                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4704                 }
4705         } else {
4706                 const __be32 *fw_data;
4707
4708                 /* MEC1 */
4709                 fw_data = (const __be32 *)rdev->mec_fw->data;
4710                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4711                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4712                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4713                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4714
4715                 if (rdev->family == CHIP_KAVERI) {
4716                         /* MEC2 */
4717                         fw_data = (const __be32 *)rdev->mec_fw->data;
4718                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4719                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4720                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4721                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4722                 }
4723         }
4724
4725         return 0;
4726 }
4727
4728 /**
4729  * cik_cp_compute_start - start the compute queues
4730  *
4731  * @rdev: radeon_device pointer
4732  *
4733  * Enable the compute queues.
4734  * Returns 0 for success, error for failure.
4735  */
4736 static int cik_cp_compute_start(struct radeon_device *rdev)
4737 {
4738         cik_cp_compute_enable(rdev, true);
4739
4740         return 0;
4741 }
4742
4743 /**
4744  * cik_cp_compute_fini - stop the compute queues
4745  *
4746  * @rdev: radeon_device pointer
4747  *
4748  * Stop the compute queues and tear down the driver queue
4749  * info.
4750  */
4751 static void cik_cp_compute_fini(struct radeon_device *rdev)
4752 {
4753         int i, idx, r;
4754
4755         cik_cp_compute_enable(rdev, false);
4756
4757         for (i = 0; i < 2; i++) {
4758                 if (i == 0)
4759                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4760                 else
4761                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4762
4763                 if (rdev->ring[idx].mqd_obj) {
4764                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4765                         if (unlikely(r != 0))
4766                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4767
4768                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4769                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4770
4771                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4772                         rdev->ring[idx].mqd_obj = NULL;
4773                 }
4774         }
4775 }
4776
4777 static void cik_mec_fini(struct radeon_device *rdev)
4778 {
4779         int r;
4780
4781         if (rdev->mec.hpd_eop_obj) {
4782                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4783                 if (unlikely(r != 0))
4784                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4785                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4786                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4787
4788                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4789                 rdev->mec.hpd_eop_obj = NULL;
4790         }
4791 }
4792
4793 #define MEC_HPD_SIZE 2048
4794
4795 static int cik_mec_init(struct radeon_device *rdev)
4796 {
4797         int r;
4798         u32 *hpd;
4799
4800         /*
4801          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4802          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4803          * Nonetheless, we assign only 1 pipe because all other pipes will
4804          * be handled by KFD
4805          */
4806         rdev->mec.num_mec = 1;
4807         rdev->mec.num_pipe = 1;
4808         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4809
4810         if (rdev->mec.hpd_eop_obj == NULL) {
4811                 r = radeon_bo_create(rdev,
4812                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4813                                      PAGE_SIZE, true,
4814                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4815                                      &rdev->mec.hpd_eop_obj);
4816                 if (r) {
4817                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4818                         return r;
4819                 }
4820         }
4821
4822         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4823         if (unlikely(r != 0)) {
4824                 cik_mec_fini(rdev);
4825                 return r;
4826         }
4827         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4828                           &rdev->mec.hpd_eop_gpu_addr);
4829         if (r) {
4830                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4831                 cik_mec_fini(rdev);
4832                 return r;
4833         }
4834         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4835         if (r) {
4836                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4837                 cik_mec_fini(rdev);
4838                 return r;
4839         }
4840
4841         /* clear memory.  Not sure if this is required or not */
4842         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4843
4844         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4845         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4846
4847         return 0;
4848 }
4849
4850 struct hqd_registers
4851 {
4852         u32 cp_mqd_base_addr;
4853         u32 cp_mqd_base_addr_hi;
4854         u32 cp_hqd_active;
4855         u32 cp_hqd_vmid;
4856         u32 cp_hqd_persistent_state;
4857         u32 cp_hqd_pipe_priority;
4858         u32 cp_hqd_queue_priority;
4859         u32 cp_hqd_quantum;
4860         u32 cp_hqd_pq_base;
4861         u32 cp_hqd_pq_base_hi;
4862         u32 cp_hqd_pq_rptr;
4863         u32 cp_hqd_pq_rptr_report_addr;
4864         u32 cp_hqd_pq_rptr_report_addr_hi;
4865         u32 cp_hqd_pq_wptr_poll_addr;
4866         u32 cp_hqd_pq_wptr_poll_addr_hi;
4867         u32 cp_hqd_pq_doorbell_control;
4868         u32 cp_hqd_pq_wptr;
4869         u32 cp_hqd_pq_control;
4870         u32 cp_hqd_ib_base_addr;
4871         u32 cp_hqd_ib_base_addr_hi;
4872         u32 cp_hqd_ib_rptr;
4873         u32 cp_hqd_ib_control;
4874         u32 cp_hqd_iq_timer;
4875         u32 cp_hqd_iq_rptr;
4876         u32 cp_hqd_dequeue_request;
4877         u32 cp_hqd_dma_offload;
4878         u32 cp_hqd_sema_cmd;
4879         u32 cp_hqd_msg_type;
4880         u32 cp_hqd_atomic0_preop_lo;
4881         u32 cp_hqd_atomic0_preop_hi;
4882         u32 cp_hqd_atomic1_preop_lo;
4883         u32 cp_hqd_atomic1_preop_hi;
4884         u32 cp_hqd_hq_scheduler0;
4885         u32 cp_hqd_hq_scheduler1;
4886         u32 cp_mqd_control;
4887 };
4888
4889 struct bonaire_mqd
4890 {
4891         u32 header;
4892         u32 dispatch_initiator;
4893         u32 dimensions[3];
4894         u32 start_idx[3];
4895         u32 num_threads[3];
4896         u32 pipeline_stat_enable;
4897         u32 perf_counter_enable;
4898         u32 pgm[2];
4899         u32 tba[2];
4900         u32 tma[2];
4901         u32 pgm_rsrc[2];
4902         u32 vmid;
4903         u32 resource_limits;
4904         u32 static_thread_mgmt01[2];
4905         u32 tmp_ring_size;
4906         u32 static_thread_mgmt23[2];
4907         u32 restart[3];
4908         u32 thread_trace_enable;
4909         u32 reserved1;
4910         u32 user_data[16];
4911         u32 vgtcs_invoke_count[2];
4912         struct hqd_registers queue_state;
4913         u32 dequeue_cntr;
4914         u32 interrupt_queue[64];
4915 };
4916
4917 /**
4918  * cik_cp_compute_resume - setup the compute queue registers
4919  *
4920  * @rdev: radeon_device pointer
4921  *
4922  * Program the compute queues and test them to make sure they
4923  * are working.
4924  * Returns 0 for success, error for failure.
4925  */
4926 static int cik_cp_compute_resume(struct radeon_device *rdev)
4927 {
4928         int r, i, j, idx;
4929         u32 tmp;
4930         bool use_doorbell = true;
4931         u64 hqd_gpu_addr;
4932         u64 mqd_gpu_addr;
4933         u64 eop_gpu_addr;
4934         u64 wb_gpu_addr;
4935         u32 *buf;
4936         struct bonaire_mqd *mqd;
4937
4938         r = cik_cp_compute_start(rdev);
4939         if (r)
4940                 return r;
4941
4942         /* fix up chicken bits */
4943         tmp = RREG32(CP_CPF_DEBUG);
4944         tmp |= (1 << 23);
4945         WREG32(CP_CPF_DEBUG, tmp);
4946
4947         /* init the pipes */
4948         mutex_lock(&rdev->srbm_mutex);
4949
4950         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4951
4952         cik_srbm_select(rdev, 0, 0, 0, 0);
4953
4954         /* write the EOP addr */
4955         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4956         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4957
4958         /* set the VMID assigned */
4959         WREG32(CP_HPD_EOP_VMID, 0);
4960
4961         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4962         tmp = RREG32(CP_HPD_EOP_CONTROL);
4963         tmp &= ~EOP_SIZE_MASK;
4964         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4965         WREG32(CP_HPD_EOP_CONTROL, tmp);
4966
4967         mutex_unlock(&rdev->srbm_mutex);
4968
4969         /* init the queues.  Just two for now. */
4970         for (i = 0; i < 2; i++) {
4971                 if (i == 0)
4972                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4973                 else
4974                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4975
4976                 if (rdev->ring[idx].mqd_obj == NULL) {
4977                         r = radeon_bo_create(rdev,
4978                                              sizeof(struct bonaire_mqd),
4979                                              PAGE_SIZE, true,
4980                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4981                                              NULL, &rdev->ring[idx].mqd_obj);
4982                         if (r) {
4983                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4984                                 return r;
4985                         }
4986                 }
4987
4988                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4989                 if (unlikely(r != 0)) {
4990                         cik_cp_compute_fini(rdev);
4991                         return r;
4992                 }
4993                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4994                                   &mqd_gpu_addr);
4995                 if (r) {
4996                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4997                         cik_cp_compute_fini(rdev);
4998                         return r;
4999                 }
5000                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
5001                 if (r) {
5002                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
5003                         cik_cp_compute_fini(rdev);
5004                         return r;
5005                 }
5006
5007                 /* init the mqd struct */
5008                 memset(buf, 0, sizeof(struct bonaire_mqd));
5009
5010                 mqd = (struct bonaire_mqd *)buf;
5011                 mqd->header = 0xC0310800;
5012                 mqd->static_thread_mgmt01[0] = 0xffffffff;
5013                 mqd->static_thread_mgmt01[1] = 0xffffffff;
5014                 mqd->static_thread_mgmt23[0] = 0xffffffff;
5015                 mqd->static_thread_mgmt23[1] = 0xffffffff;
5016
5017                 mutex_lock(&rdev->srbm_mutex);
5018                 cik_srbm_select(rdev, rdev->ring[idx].me,
5019                                 rdev->ring[idx].pipe,
5020                                 rdev->ring[idx].queue, 0);
5021
5022                 /* disable wptr polling */
5023                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5024                 tmp &= ~WPTR_POLL_EN;
5025                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5026
5027                 /* enable doorbell? */
5028                 mqd->queue_state.cp_hqd_pq_doorbell_control =
5029                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5030                 if (use_doorbell)
5031                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5032                 else
5033                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5034                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5035                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5036
5037                 /* disable the queue if it's active */
5038                 mqd->queue_state.cp_hqd_dequeue_request = 0;
5039                 mqd->queue_state.cp_hqd_pq_rptr = 0;
5040                 mqd->queue_state.cp_hqd_pq_wptr= 0;
5041                 if (RREG32(CP_HQD_ACTIVE) & 1) {
5042                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5043                         for (j = 0; j < rdev->usec_timeout; j++) {
5044                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
5045                                         break;
5046                                 udelay(1);
5047                         }
5048                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5049                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5050                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5051                 }
5052
5053                 /* set the pointer to the MQD */
5054                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5055                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5056                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5057                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5058                 /* set MQD vmid to 0 */
5059                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5060                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5061                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5062
5063                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5064                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5065                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5066                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5067                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5068                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5069
5070                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5071                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5072                 mqd->queue_state.cp_hqd_pq_control &=
5073                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5074
5075                 mqd->queue_state.cp_hqd_pq_control |=
5076                         order_base_2(rdev->ring[idx].ring_size / 8);
5077                 mqd->queue_state.cp_hqd_pq_control |=
5078                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5079 #ifdef __BIG_ENDIAN
5080                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5081 #endif
5082                 mqd->queue_state.cp_hqd_pq_control &=
5083                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5084                 mqd->queue_state.cp_hqd_pq_control |=
5085                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5086                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5087
5088                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5089                 if (i == 0)
5090                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5091                 else
5092                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5093                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5094                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5095                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5096                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5097                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5098
5099                 /* set the wb address wether it's enabled or not */
5100                 if (i == 0)
5101                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5102                 else
5103                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5104                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5105                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5106                         upper_32_bits(wb_gpu_addr) & 0xffff;
5107                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5108                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5109                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5110                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5111
5112                 /* enable the doorbell if requested */
5113                 if (use_doorbell) {
5114                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5115                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5116                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5117                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5118                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5119                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5120                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5121                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5122
5123                 } else {
5124                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5125                 }
5126                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5127                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5128
5129                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5130                 rdev->ring[idx].wptr = 0;
5131                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5132                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5133                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5134
5135                 /* set the vmid for the queue */
5136                 mqd->queue_state.cp_hqd_vmid = 0;
5137                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5138
5139                 /* activate the queue */
5140                 mqd->queue_state.cp_hqd_active = 1;
5141                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5142
5143                 cik_srbm_select(rdev, 0, 0, 0, 0);
5144                 mutex_unlock(&rdev->srbm_mutex);
5145
5146                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5147                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5148
5149                 rdev->ring[idx].ready = true;
5150                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5151                 if (r)
5152                         rdev->ring[idx].ready = false;
5153         }
5154
5155         return 0;
5156 }
5157
5158 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5159 {
5160         cik_cp_gfx_enable(rdev, enable);
5161         cik_cp_compute_enable(rdev, enable);
5162 }
5163
5164 static int cik_cp_load_microcode(struct radeon_device *rdev)
5165 {
5166         int r;
5167
5168         r = cik_cp_gfx_load_microcode(rdev);
5169         if (r)
5170                 return r;
5171         r = cik_cp_compute_load_microcode(rdev);
5172         if (r)
5173                 return r;
5174
5175         return 0;
5176 }
5177
5178 static void cik_cp_fini(struct radeon_device *rdev)
5179 {
5180         cik_cp_gfx_fini(rdev);
5181         cik_cp_compute_fini(rdev);
5182 }
5183
5184 static int cik_cp_resume(struct radeon_device *rdev)
5185 {
5186         int r;
5187
5188         cik_enable_gui_idle_interrupt(rdev, false);
5189
5190         r = cik_cp_load_microcode(rdev);
5191         if (r)
5192                 return r;
5193
5194         r = cik_cp_gfx_resume(rdev);
5195         if (r)
5196                 return r;
5197         r = cik_cp_compute_resume(rdev);
5198         if (r)
5199                 return r;
5200
5201         cik_enable_gui_idle_interrupt(rdev, true);
5202
5203         return 0;
5204 }
5205
5206 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5207 {
5208         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5209                 RREG32(GRBM_STATUS));
5210         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5211                 RREG32(GRBM_STATUS2));
5212         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5213                 RREG32(GRBM_STATUS_SE0));
5214         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5215                 RREG32(GRBM_STATUS_SE1));
5216         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5217                 RREG32(GRBM_STATUS_SE2));
5218         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5219                 RREG32(GRBM_STATUS_SE3));
5220         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5221                 RREG32(SRBM_STATUS));
5222         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5223                 RREG32(SRBM_STATUS2));
5224         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5225                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5226         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5227                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5228         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5229         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5230                  RREG32(CP_STALLED_STAT1));
5231         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5232                  RREG32(CP_STALLED_STAT2));
5233         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5234                  RREG32(CP_STALLED_STAT3));
5235         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5236                  RREG32(CP_CPF_BUSY_STAT));
5237         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5238                  RREG32(CP_CPF_STALLED_STAT1));
5239         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5240         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5241         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5242                  RREG32(CP_CPC_STALLED_STAT1));
5243         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5244 }
5245
5246 /**
5247  * cik_gpu_check_soft_reset - check which blocks are busy
5248  *
5249  * @rdev: radeon_device pointer
5250  *
5251  * Check which blocks are busy and return the relevant reset
5252  * mask to be used by cik_gpu_soft_reset().
5253  * Returns a mask of the blocks to be reset.
5254  */
5255 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5256 {
5257         u32 reset_mask = 0;
5258         u32 tmp;
5259
5260         /* GRBM_STATUS */
5261         tmp = RREG32(GRBM_STATUS);
5262         if (tmp & (PA_BUSY | SC_BUSY |
5263                    BCI_BUSY | SX_BUSY |
5264                    TA_BUSY | VGT_BUSY |
5265                    DB_BUSY | CB_BUSY |
5266                    GDS_BUSY | SPI_BUSY |
5267                    IA_BUSY | IA_BUSY_NO_DMA))
5268                 reset_mask |= RADEON_RESET_GFX;
5269
5270         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5271                 reset_mask |= RADEON_RESET_CP;
5272
5273         /* GRBM_STATUS2 */
5274         tmp = RREG32(GRBM_STATUS2);
5275         if (tmp & RLC_BUSY)
5276                 reset_mask |= RADEON_RESET_RLC;
5277
5278         /* SDMA0_STATUS_REG */
5279         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5280         if (!(tmp & SDMA_IDLE))
5281                 reset_mask |= RADEON_RESET_DMA;
5282
5283         /* SDMA1_STATUS_REG */
5284         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5285         if (!(tmp & SDMA_IDLE))
5286                 reset_mask |= RADEON_RESET_DMA1;
5287
5288         /* SRBM_STATUS2 */
5289         tmp = RREG32(SRBM_STATUS2);
5290         if (tmp & SDMA_BUSY)
5291                 reset_mask |= RADEON_RESET_DMA;
5292
5293         if (tmp & SDMA1_BUSY)
5294                 reset_mask |= RADEON_RESET_DMA1;
5295
5296         /* SRBM_STATUS */
5297         tmp = RREG32(SRBM_STATUS);
5298
5299         if (tmp & IH_BUSY)
5300                 reset_mask |= RADEON_RESET_IH;
5301
5302         if (tmp & SEM_BUSY)
5303                 reset_mask |= RADEON_RESET_SEM;
5304
5305         if (tmp & GRBM_RQ_PENDING)
5306                 reset_mask |= RADEON_RESET_GRBM;
5307
5308         if (tmp & VMC_BUSY)
5309                 reset_mask |= RADEON_RESET_VMC;
5310
5311         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5312                    MCC_BUSY | MCD_BUSY))
5313                 reset_mask |= RADEON_RESET_MC;
5314
5315         if (evergreen_is_display_hung(rdev))
5316                 reset_mask |= RADEON_RESET_DISPLAY;
5317
5318         /* Skip MC reset as it's mostly likely not hung, just busy */
5319         if (reset_mask & RADEON_RESET_MC) {
5320                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5321                 reset_mask &= ~RADEON_RESET_MC;
5322         }
5323
5324         return reset_mask;
5325 }
5326
5327 /**
5328  * cik_gpu_soft_reset - soft reset GPU
5329  *
5330  * @rdev: radeon_device pointer
5331  * @reset_mask: mask of which blocks to reset
5332  *
5333  * Soft reset the blocks specified in @reset_mask.
5334  */
5335 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5336 {
5337         struct evergreen_mc_save save;
5338         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5339         u32 tmp;
5340
5341         if (reset_mask == 0)
5342                 return;
5343
5344         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5345
5346         cik_print_gpu_status_regs(rdev);
5347         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5348                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5349         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5350                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5351
5352         /* disable CG/PG */
5353         cik_fini_pg(rdev);
5354         cik_fini_cg(rdev);
5355
5356         /* stop the rlc */
5357         cik_rlc_stop(rdev);
5358
5359         /* Disable GFX parsing/prefetching */
5360         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5361
5362         /* Disable MEC parsing/prefetching */
5363         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5364
5365         if (reset_mask & RADEON_RESET_DMA) {
5366                 /* sdma0 */
5367                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5368                 tmp |= SDMA_HALT;
5369                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5370         }
5371         if (reset_mask & RADEON_RESET_DMA1) {
5372                 /* sdma1 */
5373                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5374                 tmp |= SDMA_HALT;
5375                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5376         }
5377
5378         evergreen_mc_stop(rdev, &save);
5379         if (evergreen_mc_wait_for_idle(rdev)) {
5380                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5381         }
5382
5383         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5384                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5385
5386         if (reset_mask & RADEON_RESET_CP) {
5387                 grbm_soft_reset |= SOFT_RESET_CP;
5388
5389                 srbm_soft_reset |= SOFT_RESET_GRBM;
5390         }
5391
5392         if (reset_mask & RADEON_RESET_DMA)
5393                 srbm_soft_reset |= SOFT_RESET_SDMA;
5394
5395         if (reset_mask & RADEON_RESET_DMA1)
5396                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5397
5398         if (reset_mask & RADEON_RESET_DISPLAY)
5399                 srbm_soft_reset |= SOFT_RESET_DC;
5400
5401         if (reset_mask & RADEON_RESET_RLC)
5402                 grbm_soft_reset |= SOFT_RESET_RLC;
5403
5404         if (reset_mask & RADEON_RESET_SEM)
5405                 srbm_soft_reset |= SOFT_RESET_SEM;
5406
5407         if (reset_mask & RADEON_RESET_IH)
5408                 srbm_soft_reset |= SOFT_RESET_IH;
5409
5410         if (reset_mask & RADEON_RESET_GRBM)
5411                 srbm_soft_reset |= SOFT_RESET_GRBM;
5412
5413         if (reset_mask & RADEON_RESET_VMC)
5414                 srbm_soft_reset |= SOFT_RESET_VMC;
5415
5416         if (!(rdev->flags & RADEON_IS_IGP)) {
5417                 if (reset_mask & RADEON_RESET_MC)
5418                         srbm_soft_reset |= SOFT_RESET_MC;
5419         }
5420
5421         if (grbm_soft_reset) {
5422                 tmp = RREG32(GRBM_SOFT_RESET);
5423                 tmp |= grbm_soft_reset;
5424                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5425                 WREG32(GRBM_SOFT_RESET, tmp);
5426                 tmp = RREG32(GRBM_SOFT_RESET);
5427
5428                 udelay(50);
5429
5430                 tmp &= ~grbm_soft_reset;
5431                 WREG32(GRBM_SOFT_RESET, tmp);
5432                 tmp = RREG32(GRBM_SOFT_RESET);
5433         }
5434
5435         if (srbm_soft_reset) {
5436                 tmp = RREG32(SRBM_SOFT_RESET);
5437                 tmp |= srbm_soft_reset;
5438                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5439                 WREG32(SRBM_SOFT_RESET, tmp);
5440                 tmp = RREG32(SRBM_SOFT_RESET);
5441
5442                 udelay(50);
5443
5444                 tmp &= ~srbm_soft_reset;
5445                 WREG32(SRBM_SOFT_RESET, tmp);
5446                 tmp = RREG32(SRBM_SOFT_RESET);
5447         }
5448
5449         /* Wait a little for things to settle down */
5450         udelay(50);
5451
5452         evergreen_mc_resume(rdev, &save);
5453         udelay(50);
5454
5455         cik_print_gpu_status_regs(rdev);
5456 }
5457
5458 struct kv_reset_save_regs {
5459         u32 gmcon_reng_execute;
5460         u32 gmcon_misc;
5461         u32 gmcon_misc3;
5462 };
5463
5464 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5465                                    struct kv_reset_save_regs *save)
5466 {
5467         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5468         save->gmcon_misc = RREG32(GMCON_MISC);
5469         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5470
5471         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5472         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5473                                                 STCTRL_STUTTER_EN));
5474 }
5475
5476 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5477                                       struct kv_reset_save_regs *save)
5478 {
5479         int i;
5480
5481         WREG32(GMCON_PGFSM_WRITE, 0);
5482         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5483
5484         for (i = 0; i < 5; i++)
5485                 WREG32(GMCON_PGFSM_WRITE, 0);
5486
5487         WREG32(GMCON_PGFSM_WRITE, 0);
5488         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5489
5490         for (i = 0; i < 5; i++)
5491                 WREG32(GMCON_PGFSM_WRITE, 0);
5492
5493         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5494         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5495
5496         for (i = 0; i < 5; i++)
5497                 WREG32(GMCON_PGFSM_WRITE, 0);
5498
5499         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5500         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5501
5502         for (i = 0; i < 5; i++)
5503                 WREG32(GMCON_PGFSM_WRITE, 0);
5504
5505         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5506         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5507
5508         for (i = 0; i < 5; i++)
5509                 WREG32(GMCON_PGFSM_WRITE, 0);
5510
5511         WREG32(GMCON_PGFSM_WRITE, 0);
5512         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5513
5514         for (i = 0; i < 5; i++)
5515                 WREG32(GMCON_PGFSM_WRITE, 0);
5516
5517         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5518         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5519
5520         for (i = 0; i < 5; i++)
5521                 WREG32(GMCON_PGFSM_WRITE, 0);
5522
5523         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5524         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5525
5526         for (i = 0; i < 5; i++)
5527                 WREG32(GMCON_PGFSM_WRITE, 0);
5528
5529         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5530         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5531
5532         for (i = 0; i < 5; i++)
5533                 WREG32(GMCON_PGFSM_WRITE, 0);
5534
5535         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5536         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5537
5538         for (i = 0; i < 5; i++)
5539                 WREG32(GMCON_PGFSM_WRITE, 0);
5540
5541         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5542         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5543
5544         WREG32(GMCON_MISC3, save->gmcon_misc3);
5545         WREG32(GMCON_MISC, save->gmcon_misc);
5546         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5547 }
5548
5549 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5550 {
5551         struct evergreen_mc_save save;
5552         struct kv_reset_save_regs kv_save = { 0 };
5553         u32 tmp, i;
5554
5555         dev_info(rdev->dev, "GPU pci config reset\n");
5556
5557         /* disable dpm? */
5558
5559         /* disable cg/pg */
5560         cik_fini_pg(rdev);
5561         cik_fini_cg(rdev);
5562
5563         /* Disable GFX parsing/prefetching */
5564         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5565
5566         /* Disable MEC parsing/prefetching */
5567         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5568
5569         /* sdma0 */
5570         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5571         tmp |= SDMA_HALT;
5572         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5573         /* sdma1 */
5574         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5575         tmp |= SDMA_HALT;
5576         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5577         /* XXX other engines? */
5578
5579         /* halt the rlc, disable cp internal ints */
5580         cik_rlc_stop(rdev);
5581
5582         udelay(50);
5583
5584         /* disable mem access */
5585         evergreen_mc_stop(rdev, &save);
5586         if (evergreen_mc_wait_for_idle(rdev)) {
5587                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5588         }
5589
5590         if (rdev->flags & RADEON_IS_IGP)
5591                 kv_save_regs_for_reset(rdev, &kv_save);
5592
5593         /* disable BM */
5594         pci_clear_master(rdev->pdev);
5595         /* reset */
5596         radeon_pci_config_reset(rdev);
5597
5598         udelay(100);
5599
5600         /* wait for asic to come out of reset */
5601         for (i = 0; i < rdev->usec_timeout; i++) {
5602                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5603                         break;
5604                 udelay(1);
5605         }
5606
5607         /* does asic init need to be run first??? */
5608         if (rdev->flags & RADEON_IS_IGP)
5609                 kv_restore_regs_for_reset(rdev, &kv_save);
5610 }
5611
5612 /**
5613  * cik_asic_reset - soft reset GPU
5614  *
5615  * @rdev: radeon_device pointer
5616  *
5617  * Look up which blocks are hung and attempt
5618  * to reset them.
5619  * Returns 0 for success.
5620  */
5621 int cik_asic_reset(struct radeon_device *rdev)
5622 {
5623         u32 reset_mask;
5624
5625         reset_mask = cik_gpu_check_soft_reset(rdev);
5626
5627         if (reset_mask)
5628                 r600_set_bios_scratch_engine_hung(rdev, true);
5629
5630         /* try soft reset */
5631         cik_gpu_soft_reset(rdev, reset_mask);
5632
5633         reset_mask = cik_gpu_check_soft_reset(rdev);
5634
5635         /* try pci config reset */
5636         if (reset_mask && radeon_hard_reset)
5637                 cik_gpu_pci_config_reset(rdev);
5638
5639         reset_mask = cik_gpu_check_soft_reset(rdev);
5640
5641         if (!reset_mask)
5642                 r600_set_bios_scratch_engine_hung(rdev, false);
5643
5644         return 0;
5645 }
5646
5647 /**
5648  * cik_gfx_is_lockup - check if the 3D engine is locked up
5649  *
5650  * @rdev: radeon_device pointer
5651  * @ring: radeon_ring structure holding ring information
5652  *
5653  * Check if the 3D engine is locked up (CIK).
5654  * Returns true if the engine is locked, false if not.
5655  */
5656 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5657 {
5658         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5659
5660         if (!(reset_mask & (RADEON_RESET_GFX |
5661                             RADEON_RESET_COMPUTE |
5662                             RADEON_RESET_CP))) {
5663                 radeon_ring_lockup_update(rdev, ring);
5664                 return false;
5665         }
5666         return radeon_ring_test_lockup(rdev, ring);
5667 }
5668
5669 /* MC */
5670 /**
5671  * cik_mc_program - program the GPU memory controller
5672  *
5673  * @rdev: radeon_device pointer
5674  *
5675  * Set the location of vram, gart, and AGP in the GPU's
5676  * physical address space (CIK).
5677  */
5678 static void cik_mc_program(struct radeon_device *rdev)
5679 {
5680         struct evergreen_mc_save save;
5681         u32 tmp;
5682         int i, j;
5683
5684         /* Initialize HDP */
5685         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5686                 WREG32((0x2c14 + j), 0x00000000);
5687                 WREG32((0x2c18 + j), 0x00000000);
5688                 WREG32((0x2c1c + j), 0x00000000);
5689                 WREG32((0x2c20 + j), 0x00000000);
5690                 WREG32((0x2c24 + j), 0x00000000);
5691         }
5692         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5693
5694         evergreen_mc_stop(rdev, &save);
5695         if (radeon_mc_wait_for_idle(rdev)) {
5696                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5697         }
5698         /* Lockout access through VGA aperture*/
5699         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5700         /* Update configuration */
5701         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5702                rdev->mc.vram_start >> 12);
5703         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5704                rdev->mc.vram_end >> 12);
5705         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5706                rdev->vram_scratch.gpu_addr >> 12);
5707         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5708         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5709         WREG32(MC_VM_FB_LOCATION, tmp);
5710         /* XXX double check these! */
5711         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5712         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5713         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5714         WREG32(MC_VM_AGP_BASE, 0);
5715         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5716         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5717         if (radeon_mc_wait_for_idle(rdev)) {
5718                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5719         }
5720         evergreen_mc_resume(rdev, &save);
5721         /* we need to own VRAM, so turn off the VGA renderer here
5722          * to stop it overwriting our objects */
5723         rv515_vga_render_disable(rdev);
5724 }
5725
5726 /**
5727  * cik_mc_init - initialize the memory controller driver params
5728  *
5729  * @rdev: radeon_device pointer
5730  *
5731  * Look up the amount of vram, vram width, and decide how to place
5732  * vram and gart within the GPU's physical address space (CIK).
5733  * Returns 0 for success.
5734  */
5735 static int cik_mc_init(struct radeon_device *rdev)
5736 {
5737         u32 tmp;
5738         int chansize, numchan;
5739
5740         /* Get VRAM informations */
5741         rdev->mc.vram_is_ddr = true;
5742         tmp = RREG32(MC_ARB_RAMCFG);
5743         if (tmp & CHANSIZE_MASK) {
5744                 chansize = 64;
5745         } else {
5746                 chansize = 32;
5747         }
5748         tmp = RREG32(MC_SHARED_CHMAP);
5749         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5750         case 0:
5751         default:
5752                 numchan = 1;
5753                 break;
5754         case 1:
5755                 numchan = 2;
5756                 break;
5757         case 2:
5758                 numchan = 4;
5759                 break;
5760         case 3:
5761                 numchan = 8;
5762                 break;
5763         case 4:
5764                 numchan = 3;
5765                 break;
5766         case 5:
5767                 numchan = 6;
5768                 break;
5769         case 6:
5770                 numchan = 10;
5771                 break;
5772         case 7:
5773                 numchan = 12;
5774                 break;
5775         case 8:
5776                 numchan = 16;
5777                 break;
5778         }
5779         rdev->mc.vram_width = numchan * chansize;
5780         /* Could aper size report 0 ? */
5781         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5782         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5783         /* size in MB on si */
5784         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5785         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5786         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5787         si_vram_gtt_location(rdev, &rdev->mc);
5788         radeon_update_bandwidth_info(rdev);
5789
5790         return 0;
5791 }
5792
5793 /*
5794  * GART
5795  * VMID 0 is the physical GPU addresses as used by the kernel.
5796  * VMIDs 1-15 are used for userspace clients and are handled
5797  * by the radeon vm/hsa code.
5798  */
5799 /**
5800  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5801  *
5802  * @rdev: radeon_device pointer
5803  *
5804  * Flush the TLB for the VMID 0 page table (CIK).
5805  */
5806 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5807 {
5808         /* flush hdp cache */
5809         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5810
5811         /* bits 0-15 are the VM contexts0-15 */
5812         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5813 }
5814
5815 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5816 {
5817         int i;
5818         uint32_t sh_mem_bases, sh_mem_config;
5819
5820         sh_mem_bases = 0x6000 | 0x6000 << 16;
5821         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5822         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5823
5824         mutex_lock(&rdev->srbm_mutex);
5825         for (i = 8; i < 16; i++) {
5826                 cik_srbm_select(rdev, 0, 0, 0, i);
5827                 /* CP and shaders */
5828                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5829                 WREG32(SH_MEM_APE1_BASE, 1);
5830                 WREG32(SH_MEM_APE1_LIMIT, 0);
5831                 WREG32(SH_MEM_BASES, sh_mem_bases);
5832         }
5833         cik_srbm_select(rdev, 0, 0, 0, 0);
5834         mutex_unlock(&rdev->srbm_mutex);
5835 }
5836
5837 /**
5838  * cik_pcie_gart_enable - gart enable
5839  *
5840  * @rdev: radeon_device pointer
5841  *
5842  * This sets up the TLBs, programs the page tables for VMID0,
5843  * sets up the hw for VMIDs 1-15 which are allocated on
5844  * demand, and sets up the global locations for the LDS, GDS,
5845  * and GPUVM for FSA64 clients (CIK).
5846  * Returns 0 for success, errors for failure.
5847  */
5848 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5849 {
5850         int r, i;
5851
5852         if (rdev->gart.robj == NULL) {
5853                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5854                 return -EINVAL;
5855         }
5856         r = radeon_gart_table_vram_pin(rdev);
5857         if (r)
5858                 return r;
5859         /* Setup TLB control */
5860         WREG32(MC_VM_MX_L1_TLB_CNTL,
5861                (0xA << 7) |
5862                ENABLE_L1_TLB |
5863                ENABLE_L1_FRAGMENT_PROCESSING |
5864                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5865                ENABLE_ADVANCED_DRIVER_MODEL |
5866                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5867         /* Setup L2 cache */
5868         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5869                ENABLE_L2_FRAGMENT_PROCESSING |
5870                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5871                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5872                EFFECTIVE_L2_QUEUE_SIZE(7) |
5873                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5874         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5875         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5876                BANK_SELECT(4) |
5877                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5878         /* setup context0 */
5879         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5880         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5881         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5882         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5883                         (u32)(rdev->dummy_page.addr >> 12));
5884         WREG32(VM_CONTEXT0_CNTL2, 0);
5885         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5886                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5887
5888         WREG32(0x15D4, 0);
5889         WREG32(0x15D8, 0);
5890         WREG32(0x15DC, 0);
5891
5892         /* restore context1-15 */
5893         /* set vm size, must be a multiple of 4 */
5894         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5895         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5896         for (i = 1; i < 16; i++) {
5897                 if (i < 8)
5898                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5899                                rdev->vm_manager.saved_table_addr[i]);
5900                 else
5901                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5902                                rdev->vm_manager.saved_table_addr[i]);
5903         }
5904
5905         /* enable context1-15 */
5906         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5907                (u32)(rdev->dummy_page.addr >> 12));
5908         WREG32(VM_CONTEXT1_CNTL2, 4);
5909         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5910                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5911                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5912                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5913                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5914                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5915                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5916                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5917                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5918                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5919                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5920                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5921                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5922                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5923
5924         if (rdev->family == CHIP_KAVERI) {
5925                 u32 tmp = RREG32(CHUB_CONTROL);
5926                 tmp &= ~BYPASS_VM;
5927                 WREG32(CHUB_CONTROL, tmp);
5928         }
5929
5930         /* XXX SH_MEM regs */
5931         /* where to put LDS, scratch, GPUVM in FSA64 space */
5932         mutex_lock(&rdev->srbm_mutex);
5933         for (i = 0; i < 16; i++) {
5934                 cik_srbm_select(rdev, 0, 0, 0, i);
5935                 /* CP and shaders */
5936                 WREG32(SH_MEM_CONFIG, 0);
5937                 WREG32(SH_MEM_APE1_BASE, 1);
5938                 WREG32(SH_MEM_APE1_LIMIT, 0);
5939                 WREG32(SH_MEM_BASES, 0);
5940                 /* SDMA GFX */
5941                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5942                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5943                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5944                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5945                 /* XXX SDMA RLC - todo */
5946         }
5947         cik_srbm_select(rdev, 0, 0, 0, 0);
5948         mutex_unlock(&rdev->srbm_mutex);
5949
5950         cik_pcie_init_compute_vmid(rdev);
5951
5952         cik_pcie_gart_tlb_flush(rdev);
5953         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5954                  (unsigned)(rdev->mc.gtt_size >> 20),
5955                  (unsigned long long)rdev->gart.table_addr);
5956         rdev->gart.ready = true;
5957         return 0;
5958 }
5959
5960 /**
5961  * cik_pcie_gart_disable - gart disable
5962  *
5963  * @rdev: radeon_device pointer
5964  *
5965  * This disables all VM page table (CIK).
5966  */
5967 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5968 {
5969         unsigned i;
5970
5971         for (i = 1; i < 16; ++i) {
5972                 uint32_t reg;
5973                 if (i < 8)
5974                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5975                 else
5976                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5977                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5978         }
5979
5980         /* Disable all tables */
5981         WREG32(VM_CONTEXT0_CNTL, 0);
5982         WREG32(VM_CONTEXT1_CNTL, 0);
5983         /* Setup TLB control */
5984         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5985                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5986         /* Setup L2 cache */
5987         WREG32(VM_L2_CNTL,
5988                ENABLE_L2_FRAGMENT_PROCESSING |
5989                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5990                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5991                EFFECTIVE_L2_QUEUE_SIZE(7) |
5992                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5993         WREG32(VM_L2_CNTL2, 0);
5994         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5995                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5996         radeon_gart_table_vram_unpin(rdev);
5997 }
5998
5999 /**
6000  * cik_pcie_gart_fini - vm fini callback
6001  *
6002  * @rdev: radeon_device pointer
6003  *
6004  * Tears down the driver GART/VM setup (CIK).
6005  */
6006 static void cik_pcie_gart_fini(struct radeon_device *rdev)
6007 {
6008         cik_pcie_gart_disable(rdev);
6009         radeon_gart_table_vram_free(rdev);
6010         radeon_gart_fini(rdev);
6011 }
6012
6013 /* vm parser */
6014 /**
6015  * cik_ib_parse - vm ib_parse callback
6016  *
6017  * @rdev: radeon_device pointer
6018  * @ib: indirect buffer pointer
6019  *
6020  * CIK uses hw IB checking so this is a nop (CIK).
6021  */
6022 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6023 {
6024         return 0;
6025 }
6026
6027 /*
6028  * vm
6029  * VMID 0 is the physical GPU addresses as used by the kernel.
6030  * VMIDs 1-15 are used for userspace clients and are handled
6031  * by the radeon vm/hsa code.
6032  */
6033 /**
6034  * cik_vm_init - cik vm init callback
6035  *
6036  * @rdev: radeon_device pointer
6037  *
6038  * Inits cik specific vm parameters (number of VMs, base of vram for
6039  * VMIDs 1-15) (CIK).
6040  * Returns 0 for success.
6041  */
6042 int cik_vm_init(struct radeon_device *rdev)
6043 {
6044         /*
6045          * number of VMs
6046          * VMID 0 is reserved for System
6047          * radeon graphics/compute will use VMIDs 1-7
6048          * amdkfd will use VMIDs 8-15
6049          */
6050         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6051         /* base offset of vram pages */
6052         if (rdev->flags & RADEON_IS_IGP) {
6053                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
6054                 tmp <<= 22;
6055                 rdev->vm_manager.vram_base_offset = tmp;
6056         } else
6057                 rdev->vm_manager.vram_base_offset = 0;
6058
6059         return 0;
6060 }
6061
6062 /**
6063  * cik_vm_fini - cik vm fini callback
6064  *
6065  * @rdev: radeon_device pointer
6066  *
6067  * Tear down any asic specific VM setup (CIK).
6068  */
6069 void cik_vm_fini(struct radeon_device *rdev)
6070 {
6071 }
6072
6073 /**
6074  * cik_vm_decode_fault - print human readable fault info
6075  *
6076  * @rdev: radeon_device pointer
6077  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6078  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6079  *
6080  * Print human readable fault information (CIK).
6081  */
6082 static void cik_vm_decode_fault(struct radeon_device *rdev,
6083                                 u32 status, u32 addr, u32 mc_client)
6084 {
6085         u32 mc_id;
6086         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6087         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6088         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6089                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6090
6091         if (rdev->family == CHIP_HAWAII)
6092                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6093         else
6094                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6095
6096         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6097                protections, vmid, addr,
6098                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6099                block, mc_client, mc_id);
6100 }
6101
6102 /**
6103  * cik_vm_flush - cik vm flush using the CP
6104  *
6105  * @rdev: radeon_device pointer
6106  *
6107  * Update the page table base and flush the VM TLB
6108  * using the CP (CIK).
6109  */
6110 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6111                   unsigned vm_id, uint64_t pd_addr)
6112 {
6113         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6114
6115         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6116         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6117                                  WRITE_DATA_DST_SEL(0)));
6118         if (vm_id < 8) {
6119                 radeon_ring_write(ring,
6120                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6121         } else {
6122                 radeon_ring_write(ring,
6123                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6124         }
6125         radeon_ring_write(ring, 0);
6126         radeon_ring_write(ring, pd_addr >> 12);
6127
6128         /* update SH_MEM_* regs */
6129         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6130         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6131                                  WRITE_DATA_DST_SEL(0)));
6132         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6133         radeon_ring_write(ring, 0);
6134         radeon_ring_write(ring, VMID(vm_id));
6135
6136         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6137         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6138                                  WRITE_DATA_DST_SEL(0)));
6139         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6140         radeon_ring_write(ring, 0);
6141
6142         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6143         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6144         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6145         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6146
6147         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6148         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6149                                  WRITE_DATA_DST_SEL(0)));
6150         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6151         radeon_ring_write(ring, 0);
6152         radeon_ring_write(ring, VMID(0));
6153
6154         /* HDP flush */
6155         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6156
6157         /* bits 0-15 are the VM contexts0-15 */
6158         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6159         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6160                                  WRITE_DATA_DST_SEL(0)));
6161         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6162         radeon_ring_write(ring, 0);
6163         radeon_ring_write(ring, 1 << vm_id);
6164
6165         /* wait for the invalidate to complete */
6166         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6167         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6168                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6169                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6170         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6171         radeon_ring_write(ring, 0);
6172         radeon_ring_write(ring, 0); /* ref */
6173         radeon_ring_write(ring, 0); /* mask */
6174         radeon_ring_write(ring, 0x20); /* poll interval */
6175
6176         /* compute doesn't have PFP */
6177         if (usepfp) {
6178                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6179                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6180                 radeon_ring_write(ring, 0x0);
6181         }
6182 }
6183
6184 /*
6185  * RLC
6186  * The RLC is a multi-purpose microengine that handles a
6187  * variety of functions, the most important of which is
6188  * the interrupt controller.
6189  */
6190 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6191                                           bool enable)
6192 {
6193         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6194
6195         if (enable)
6196                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6197         else
6198                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6199         WREG32(CP_INT_CNTL_RING0, tmp);
6200 }
6201
6202 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6203 {
6204         u32 tmp;
6205
6206         tmp = RREG32(RLC_LB_CNTL);
6207         if (enable)
6208                 tmp |= LOAD_BALANCE_ENABLE;
6209         else
6210                 tmp &= ~LOAD_BALANCE_ENABLE;
6211         WREG32(RLC_LB_CNTL, tmp);
6212 }
6213
6214 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6215 {
6216         u32 i, j, k;
6217         u32 mask;
6218
6219         mutex_lock(&rdev->grbm_idx_mutex);
6220         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6221                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6222                         cik_select_se_sh(rdev, i, j);
6223                         for (k = 0; k < rdev->usec_timeout; k++) {
6224                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6225                                         break;
6226                                 udelay(1);
6227                         }
6228                 }
6229         }
6230         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6231         mutex_unlock(&rdev->grbm_idx_mutex);
6232
6233         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6234         for (k = 0; k < rdev->usec_timeout; k++) {
6235                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6236                         break;
6237                 udelay(1);
6238         }
6239 }
6240
6241 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6242 {
6243         u32 tmp;
6244
6245         tmp = RREG32(RLC_CNTL);
6246         if (tmp != rlc)
6247                 WREG32(RLC_CNTL, rlc);
6248 }
6249
6250 static u32 cik_halt_rlc(struct radeon_device *rdev)
6251 {
6252         u32 data, orig;
6253
6254         orig = data = RREG32(RLC_CNTL);
6255
6256         if (data & RLC_ENABLE) {
6257                 u32 i;
6258
6259                 data &= ~RLC_ENABLE;
6260                 WREG32(RLC_CNTL, data);
6261
6262                 for (i = 0; i < rdev->usec_timeout; i++) {
6263                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6264                                 break;
6265                         udelay(1);
6266                 }
6267
6268                 cik_wait_for_rlc_serdes(rdev);
6269         }
6270
6271         return orig;
6272 }
6273
6274 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6275 {
6276         u32 tmp, i, mask;
6277
6278         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6279         WREG32(RLC_GPR_REG2, tmp);
6280
6281         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6282         for (i = 0; i < rdev->usec_timeout; i++) {
6283                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6284                         break;
6285                 udelay(1);
6286         }
6287
6288         for (i = 0; i < rdev->usec_timeout; i++) {
6289                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6290                         break;
6291                 udelay(1);
6292         }
6293 }
6294
6295 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6296 {
6297         u32 tmp;
6298
6299         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6300         WREG32(RLC_GPR_REG2, tmp);
6301 }
6302
6303 /**
6304  * cik_rlc_stop - stop the RLC ME
6305  *
6306  * @rdev: radeon_device pointer
6307  *
6308  * Halt the RLC ME (MicroEngine) (CIK).
6309  */
6310 static void cik_rlc_stop(struct radeon_device *rdev)
6311 {
6312         WREG32(RLC_CNTL, 0);
6313
6314         cik_enable_gui_idle_interrupt(rdev, false);
6315
6316         cik_wait_for_rlc_serdes(rdev);
6317 }
6318
6319 /**
6320  * cik_rlc_start - start the RLC ME
6321  *
6322  * @rdev: radeon_device pointer
6323  *
6324  * Unhalt the RLC ME (MicroEngine) (CIK).
6325  */
6326 static void cik_rlc_start(struct radeon_device *rdev)
6327 {
6328         WREG32(RLC_CNTL, RLC_ENABLE);
6329
6330         cik_enable_gui_idle_interrupt(rdev, true);
6331
6332         udelay(50);
6333 }
6334
6335 /**
6336  * cik_rlc_resume - setup the RLC hw
6337  *
6338  * @rdev: radeon_device pointer
6339  *
6340  * Initialize the RLC registers, load the ucode,
6341  * and start the RLC (CIK).
6342  * Returns 0 for success, -EINVAL if the ucode is not available.
6343  */
6344 static int cik_rlc_resume(struct radeon_device *rdev)
6345 {
6346         u32 i, size, tmp;
6347
6348         if (!rdev->rlc_fw)
6349                 return -EINVAL;
6350
6351         cik_rlc_stop(rdev);
6352
6353         /* disable CG */
6354         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6355         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6356
6357         si_rlc_reset(rdev);
6358
6359         cik_init_pg(rdev);
6360
6361         cik_init_cg(rdev);
6362
6363         WREG32(RLC_LB_CNTR_INIT, 0);
6364         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6365
6366         mutex_lock(&rdev->grbm_idx_mutex);
6367         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6368         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6369         WREG32(RLC_LB_PARAMS, 0x00600408);
6370         WREG32(RLC_LB_CNTL, 0x80000004);
6371         mutex_unlock(&rdev->grbm_idx_mutex);
6372
6373         WREG32(RLC_MC_CNTL, 0);
6374         WREG32(RLC_UCODE_CNTL, 0);
6375
6376         if (rdev->new_fw) {
6377                 const struct rlc_firmware_header_v1_0 *hdr =
6378                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6379                 const __le32 *fw_data = (const __le32 *)
6380                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6381
6382                 radeon_ucode_print_rlc_hdr(&hdr->header);
6383
6384                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6385                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6386                 for (i = 0; i < size; i++)
6387                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6388                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6389         } else {
6390                 const __be32 *fw_data;
6391
6392                 switch (rdev->family) {
6393                 case CHIP_BONAIRE:
6394                 case CHIP_HAWAII:
6395                 default:
6396                         size = BONAIRE_RLC_UCODE_SIZE;
6397                         break;
6398                 case CHIP_KAVERI:
6399                         size = KV_RLC_UCODE_SIZE;
6400                         break;
6401                 case CHIP_KABINI:
6402                         size = KB_RLC_UCODE_SIZE;
6403                         break;
6404                 case CHIP_MULLINS:
6405                         size = ML_RLC_UCODE_SIZE;
6406                         break;
6407                 }
6408
6409                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6410                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6411                 for (i = 0; i < size; i++)
6412                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6413                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6414         }
6415
6416         /* XXX - find out what chips support lbpw */
6417         cik_enable_lbpw(rdev, false);
6418
6419         if (rdev->family == CHIP_BONAIRE)
6420                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6421
6422         cik_rlc_start(rdev);
6423
6424         return 0;
6425 }
6426
6427 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6428 {
6429         u32 data, orig, tmp, tmp2;
6430
6431         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6432
6433         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6434                 cik_enable_gui_idle_interrupt(rdev, true);
6435
6436                 tmp = cik_halt_rlc(rdev);
6437
6438                 mutex_lock(&rdev->grbm_idx_mutex);
6439                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6440                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6441                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6442                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6443                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6444                 mutex_unlock(&rdev->grbm_idx_mutex);
6445
6446                 cik_update_rlc(rdev, tmp);
6447
6448                 data |= CGCG_EN | CGLS_EN;
6449         } else {
6450                 cik_enable_gui_idle_interrupt(rdev, false);
6451
6452                 RREG32(CB_CGTT_SCLK_CTRL);
6453                 RREG32(CB_CGTT_SCLK_CTRL);
6454                 RREG32(CB_CGTT_SCLK_CTRL);
6455                 RREG32(CB_CGTT_SCLK_CTRL);
6456
6457                 data &= ~(CGCG_EN | CGLS_EN);
6458         }
6459
6460         if (orig != data)
6461                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6462
6463 }
6464
6465 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6466 {
6467         u32 data, orig, tmp = 0;
6468
6469         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6470                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6471                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6472                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6473                                 data |= CP_MEM_LS_EN;
6474                                 if (orig != data)
6475                                         WREG32(CP_MEM_SLP_CNTL, data);
6476                         }
6477                 }
6478
6479                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6480                 data |= 0x00000001;
6481                 data &= 0xfffffffd;
6482                 if (orig != data)
6483                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6484
6485                 tmp = cik_halt_rlc(rdev);
6486
6487                 mutex_lock(&rdev->grbm_idx_mutex);
6488                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6489                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6490                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6491                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6492                 WREG32(RLC_SERDES_WR_CTRL, data);
6493                 mutex_unlock(&rdev->grbm_idx_mutex);
6494
6495                 cik_update_rlc(rdev, tmp);
6496
6497                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6498                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6499                         data &= ~SM_MODE_MASK;
6500                         data |= SM_MODE(0x2);
6501                         data |= SM_MODE_ENABLE;
6502                         data &= ~CGTS_OVERRIDE;
6503                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6504                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6505                                 data &= ~CGTS_LS_OVERRIDE;
6506                         data &= ~ON_MONITOR_ADD_MASK;
6507                         data |= ON_MONITOR_ADD_EN;
6508                         data |= ON_MONITOR_ADD(0x96);
6509                         if (orig != data)
6510                                 WREG32(CGTS_SM_CTRL_REG, data);
6511                 }
6512         } else {
6513                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6514                 data |= 0x00000003;
6515                 if (orig != data)
6516                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6517
6518                 data = RREG32(RLC_MEM_SLP_CNTL);
6519                 if (data & RLC_MEM_LS_EN) {
6520                         data &= ~RLC_MEM_LS_EN;
6521                         WREG32(RLC_MEM_SLP_CNTL, data);
6522                 }
6523
6524                 data = RREG32(CP_MEM_SLP_CNTL);
6525                 if (data & CP_MEM_LS_EN) {
6526                         data &= ~CP_MEM_LS_EN;
6527                         WREG32(CP_MEM_SLP_CNTL, data);
6528                 }
6529
6530                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6531                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6532                 if (orig != data)
6533                         WREG32(CGTS_SM_CTRL_REG, data);
6534
6535                 tmp = cik_halt_rlc(rdev);
6536
6537                 mutex_lock(&rdev->grbm_idx_mutex);
6538                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6539                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6540                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6541                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6542                 WREG32(RLC_SERDES_WR_CTRL, data);
6543                 mutex_unlock(&rdev->grbm_idx_mutex);
6544
6545                 cik_update_rlc(rdev, tmp);
6546         }
6547 }
6548
6549 static const u32 mc_cg_registers[] =
6550 {
6551         MC_HUB_MISC_HUB_CG,
6552         MC_HUB_MISC_SIP_CG,
6553         MC_HUB_MISC_VM_CG,
6554         MC_XPB_CLK_GAT,
6555         ATC_MISC_CG,
6556         MC_CITF_MISC_WR_CG,
6557         MC_CITF_MISC_RD_CG,
6558         MC_CITF_MISC_VM_CG,
6559         VM_L2_CG,
6560 };
6561
6562 static void cik_enable_mc_ls(struct radeon_device *rdev,
6563                              bool enable)
6564 {
6565         int i;
6566         u32 orig, data;
6567
6568         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6569                 orig = data = RREG32(mc_cg_registers[i]);
6570                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6571                         data |= MC_LS_ENABLE;
6572                 else
6573                         data &= ~MC_LS_ENABLE;
6574                 if (data != orig)
6575                         WREG32(mc_cg_registers[i], data);
6576         }
6577 }
6578
6579 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6580                                bool enable)
6581 {
6582         int i;
6583         u32 orig, data;
6584
6585         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6586                 orig = data = RREG32(mc_cg_registers[i]);
6587                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6588                         data |= MC_CG_ENABLE;
6589                 else
6590                         data &= ~MC_CG_ENABLE;
6591                 if (data != orig)
6592                         WREG32(mc_cg_registers[i], data);
6593         }
6594 }
6595
6596 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6597                                  bool enable)
6598 {
6599         u32 orig, data;
6600
6601         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6602                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6603                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6604         } else {
6605                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6606                 data |= 0xff000000;
6607                 if (data != orig)
6608                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6609
6610                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6611                 data |= 0xff000000;
6612                 if (data != orig)
6613                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6614         }
6615 }
6616
6617 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6618                                  bool enable)
6619 {
6620         u32 orig, data;
6621
6622         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6623                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6624                 data |= 0x100;
6625                 if (orig != data)
6626                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6627
6628                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6629                 data |= 0x100;
6630                 if (orig != data)
6631                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6632         } else {
6633                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6634                 data &= ~0x100;
6635                 if (orig != data)
6636                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6637
6638                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6639                 data &= ~0x100;
6640                 if (orig != data)
6641                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6642         }
6643 }
6644
6645 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6646                                 bool enable)
6647 {
6648         u32 orig, data;
6649
6650         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6651                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6652                 data = 0xfff;
6653                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6654
6655                 orig = data = RREG32(UVD_CGC_CTRL);
6656                 data |= DCM;
6657                 if (orig != data)
6658                         WREG32(UVD_CGC_CTRL, data);
6659         } else {
6660                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6661                 data &= ~0xfff;
6662                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6663
6664                 orig = data = RREG32(UVD_CGC_CTRL);
6665                 data &= ~DCM;
6666                 if (orig != data)
6667                         WREG32(UVD_CGC_CTRL, data);
6668         }
6669 }
6670
6671 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6672                                bool enable)
6673 {
6674         u32 orig, data;
6675
6676         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6677
6678         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6679                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6680                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6681         else
6682                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6683                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6684
6685         if (orig != data)
6686                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6687 }
6688
6689 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6690                                 bool enable)
6691 {
6692         u32 orig, data;
6693
6694         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6695
6696         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6697                 data &= ~CLOCK_GATING_DIS;
6698         else
6699                 data |= CLOCK_GATING_DIS;
6700
6701         if (orig != data)
6702                 WREG32(HDP_HOST_PATH_CNTL, data);
6703 }
6704
6705 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6706                               bool enable)
6707 {
6708         u32 orig, data;
6709
6710         orig = data = RREG32(HDP_MEM_POWER_LS);
6711
6712         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6713                 data |= HDP_LS_ENABLE;
6714         else
6715                 data &= ~HDP_LS_ENABLE;
6716
6717         if (orig != data)
6718                 WREG32(HDP_MEM_POWER_LS, data);
6719 }
6720
6721 void cik_update_cg(struct radeon_device *rdev,
6722                    u32 block, bool enable)
6723 {
6724
6725         if (block & RADEON_CG_BLOCK_GFX) {
6726                 cik_enable_gui_idle_interrupt(rdev, false);
6727                 /* order matters! */
6728                 if (enable) {
6729                         cik_enable_mgcg(rdev, true);
6730                         cik_enable_cgcg(rdev, true);
6731                 } else {
6732                         cik_enable_cgcg(rdev, false);
6733                         cik_enable_mgcg(rdev, false);
6734                 }
6735                 cik_enable_gui_idle_interrupt(rdev, true);
6736         }
6737
6738         if (block & RADEON_CG_BLOCK_MC) {
6739                 if (!(rdev->flags & RADEON_IS_IGP)) {
6740                         cik_enable_mc_mgcg(rdev, enable);
6741                         cik_enable_mc_ls(rdev, enable);
6742                 }
6743         }
6744
6745         if (block & RADEON_CG_BLOCK_SDMA) {
6746                 cik_enable_sdma_mgcg(rdev, enable);
6747                 cik_enable_sdma_mgls(rdev, enable);
6748         }
6749
6750         if (block & RADEON_CG_BLOCK_BIF) {
6751                 cik_enable_bif_mgls(rdev, enable);
6752         }
6753
6754         if (block & RADEON_CG_BLOCK_UVD) {
6755                 if (rdev->has_uvd)
6756                         cik_enable_uvd_mgcg(rdev, enable);
6757         }
6758
6759         if (block & RADEON_CG_BLOCK_HDP) {
6760                 cik_enable_hdp_mgcg(rdev, enable);
6761                 cik_enable_hdp_ls(rdev, enable);
6762         }
6763
6764         if (block & RADEON_CG_BLOCK_VCE) {
6765                 vce_v2_0_enable_mgcg(rdev, enable);
6766         }
6767 }
6768
6769 static void cik_init_cg(struct radeon_device *rdev)
6770 {
6771
6772         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6773
6774         if (rdev->has_uvd)
6775                 si_init_uvd_internal_cg(rdev);
6776
6777         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6778                              RADEON_CG_BLOCK_SDMA |
6779                              RADEON_CG_BLOCK_BIF |
6780                              RADEON_CG_BLOCK_UVD |
6781                              RADEON_CG_BLOCK_HDP), true);
6782 }
6783
6784 static void cik_fini_cg(struct radeon_device *rdev)
6785 {
6786         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6787                              RADEON_CG_BLOCK_SDMA |
6788                              RADEON_CG_BLOCK_BIF |
6789                              RADEON_CG_BLOCK_UVD |
6790                              RADEON_CG_BLOCK_HDP), false);
6791
6792         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6793 }
6794
6795 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6796                                           bool enable)
6797 {
6798         u32 data, orig;
6799
6800         orig = data = RREG32(RLC_PG_CNTL);
6801         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6802                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6803         else
6804                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6805         if (orig != data)
6806                 WREG32(RLC_PG_CNTL, data);
6807 }
6808
6809 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6810                                           bool enable)
6811 {
6812         u32 data, orig;
6813
6814         orig = data = RREG32(RLC_PG_CNTL);
6815         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6816                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6817         else
6818                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6819         if (orig != data)
6820                 WREG32(RLC_PG_CNTL, data);
6821 }
6822
6823 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6824 {
6825         u32 data, orig;
6826
6827         orig = data = RREG32(RLC_PG_CNTL);
6828         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6829                 data &= ~DISABLE_CP_PG;
6830         else
6831                 data |= DISABLE_CP_PG;
6832         if (orig != data)
6833                 WREG32(RLC_PG_CNTL, data);
6834 }
6835
6836 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6837 {
6838         u32 data, orig;
6839
6840         orig = data = RREG32(RLC_PG_CNTL);
6841         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6842                 data &= ~DISABLE_GDS_PG;
6843         else
6844                 data |= DISABLE_GDS_PG;
6845         if (orig != data)
6846                 WREG32(RLC_PG_CNTL, data);
6847 }
6848
6849 #define CP_ME_TABLE_SIZE    96
6850 #define CP_ME_TABLE_OFFSET  2048
6851 #define CP_MEC_TABLE_OFFSET 4096
6852
6853 void cik_init_cp_pg_table(struct radeon_device *rdev)
6854 {
6855         volatile u32 *dst_ptr;
6856         int me, i, max_me = 4;
6857         u32 bo_offset = 0;
6858         u32 table_offset, table_size;
6859
6860         if (rdev->family == CHIP_KAVERI)
6861                 max_me = 5;
6862
6863         if (rdev->rlc.cp_table_ptr == NULL)
6864                 return;
6865
6866         /* write the cp table buffer */
6867         dst_ptr = rdev->rlc.cp_table_ptr;
6868         for (me = 0; me < max_me; me++) {
6869                 if (rdev->new_fw) {
6870                         const __le32 *fw_data;
6871                         const struct gfx_firmware_header_v1_0 *hdr;
6872
6873                         if (me == 0) {
6874                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6875                                 fw_data = (const __le32 *)
6876                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6877                                 table_offset = le32_to_cpu(hdr->jt_offset);
6878                                 table_size = le32_to_cpu(hdr->jt_size);
6879                         } else if (me == 1) {
6880                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6881                                 fw_data = (const __le32 *)
6882                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6883                                 table_offset = le32_to_cpu(hdr->jt_offset);
6884                                 table_size = le32_to_cpu(hdr->jt_size);
6885                         } else if (me == 2) {
6886                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6887                                 fw_data = (const __le32 *)
6888                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6889                                 table_offset = le32_to_cpu(hdr->jt_offset);
6890                                 table_size = le32_to_cpu(hdr->jt_size);
6891                         } else if (me == 3) {
6892                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6893                                 fw_data = (const __le32 *)
6894                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6895                                 table_offset = le32_to_cpu(hdr->jt_offset);
6896                                 table_size = le32_to_cpu(hdr->jt_size);
6897                         } else {
6898                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6899                                 fw_data = (const __le32 *)
6900                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6901                                 table_offset = le32_to_cpu(hdr->jt_offset);
6902                                 table_size = le32_to_cpu(hdr->jt_size);
6903                         }
6904
6905                         for (i = 0; i < table_size; i ++) {
6906                                 dst_ptr[bo_offset + i] =
6907                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6908                         }
6909                         bo_offset += table_size;
6910                 } else {
6911                         const __be32 *fw_data;
6912                         table_size = CP_ME_TABLE_SIZE;
6913
6914                         if (me == 0) {
6915                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6916                                 table_offset = CP_ME_TABLE_OFFSET;
6917                         } else if (me == 1) {
6918                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6919                                 table_offset = CP_ME_TABLE_OFFSET;
6920                         } else if (me == 2) {
6921                                 fw_data = (const __be32 *)rdev->me_fw->data;
6922                                 table_offset = CP_ME_TABLE_OFFSET;
6923                         } else {
6924                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6925                                 table_offset = CP_MEC_TABLE_OFFSET;
6926                         }
6927
6928                         for (i = 0; i < table_size; i ++) {
6929                                 dst_ptr[bo_offset + i] =
6930                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6931                         }
6932                         bo_offset += table_size;
6933                 }
6934         }
6935 }
6936
6937 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6938                                 bool enable)
6939 {
6940         u32 data, orig;
6941
6942         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6943                 orig = data = RREG32(RLC_PG_CNTL);
6944                 data |= GFX_PG_ENABLE;
6945                 if (orig != data)
6946                         WREG32(RLC_PG_CNTL, data);
6947
6948                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6949                 data |= AUTO_PG_EN;
6950                 if (orig != data)
6951                         WREG32(RLC_AUTO_PG_CTRL, data);
6952         } else {
6953                 orig = data = RREG32(RLC_PG_CNTL);
6954                 data &= ~GFX_PG_ENABLE;
6955                 if (orig != data)
6956                         WREG32(RLC_PG_CNTL, data);
6957
6958                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6959                 data &= ~AUTO_PG_EN;
6960                 if (orig != data)
6961                         WREG32(RLC_AUTO_PG_CTRL, data);
6962
6963                 data = RREG32(DB_RENDER_CONTROL);
6964         }
6965 }
6966
6967 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6968 {
6969         u32 mask = 0, tmp, tmp1;
6970         int i;
6971
6972         mutex_lock(&rdev->grbm_idx_mutex);
6973         cik_select_se_sh(rdev, se, sh);
6974         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6975         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6976         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6977         mutex_unlock(&rdev->grbm_idx_mutex);
6978
6979         tmp &= 0xffff0000;
6980
6981         tmp |= tmp1;
6982         tmp >>= 16;
6983
6984         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6985                 mask <<= 1;
6986                 mask |= 1;
6987         }
6988
6989         return (~tmp) & mask;
6990 }
6991
6992 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6993 {
6994         u32 i, j, k, active_cu_number = 0;
6995         u32 mask, counter, cu_bitmap;
6996         u32 tmp = 0;
6997
6998         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6999                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
7000                         mask = 1;
7001                         cu_bitmap = 0;
7002                         counter = 0;
7003                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
7004                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
7005                                         if (counter < 2)
7006                                                 cu_bitmap |= mask;
7007                                         counter ++;
7008                                 }
7009                                 mask <<= 1;
7010                         }
7011
7012                         active_cu_number += counter;
7013                         tmp |= (cu_bitmap << (i * 16 + j * 8));
7014                 }
7015         }
7016
7017         WREG32(RLC_PG_AO_CU_MASK, tmp);
7018
7019         tmp = RREG32(RLC_MAX_PG_CU);
7020         tmp &= ~MAX_PU_CU_MASK;
7021         tmp |= MAX_PU_CU(active_cu_number);
7022         WREG32(RLC_MAX_PG_CU, tmp);
7023 }
7024
7025 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7026                                        bool enable)
7027 {
7028         u32 data, orig;
7029
7030         orig = data = RREG32(RLC_PG_CNTL);
7031         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7032                 data |= STATIC_PER_CU_PG_ENABLE;
7033         else
7034                 data &= ~STATIC_PER_CU_PG_ENABLE;
7035         if (orig != data)
7036                 WREG32(RLC_PG_CNTL, data);
7037 }
7038
7039 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7040                                         bool enable)
7041 {
7042         u32 data, orig;
7043
7044         orig = data = RREG32(RLC_PG_CNTL);
7045         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7046                 data |= DYN_PER_CU_PG_ENABLE;
7047         else
7048                 data &= ~DYN_PER_CU_PG_ENABLE;
7049         if (orig != data)
7050                 WREG32(RLC_PG_CNTL, data);
7051 }
7052
7053 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7054 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7055
7056 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7057 {
7058         u32 data, orig;
7059         u32 i;
7060
7061         if (rdev->rlc.cs_data) {
7062                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7063                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7064                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7065                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7066         } else {
7067                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7068                 for (i = 0; i < 3; i++)
7069                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
7070         }
7071         if (rdev->rlc.reg_list) {
7072                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7073                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
7074                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7075         }
7076
7077         orig = data = RREG32(RLC_PG_CNTL);
7078         data |= GFX_PG_SRC;
7079         if (orig != data)
7080                 WREG32(RLC_PG_CNTL, data);
7081
7082         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7083         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7084
7085         data = RREG32(CP_RB_WPTR_POLL_CNTL);
7086         data &= ~IDLE_POLL_COUNT_MASK;
7087         data |= IDLE_POLL_COUNT(0x60);
7088         WREG32(CP_RB_WPTR_POLL_CNTL, data);
7089
7090         data = 0x10101010;
7091         WREG32(RLC_PG_DELAY, data);
7092
7093         data = RREG32(RLC_PG_DELAY_2);
7094         data &= ~0xff;
7095         data |= 0x3;
7096         WREG32(RLC_PG_DELAY_2, data);
7097
7098         data = RREG32(RLC_AUTO_PG_CTRL);
7099         data &= ~GRBM_REG_SGIT_MASK;
7100         data |= GRBM_REG_SGIT(0x700);
7101         WREG32(RLC_AUTO_PG_CTRL, data);
7102
7103 }
7104
7105 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7106 {
7107         cik_enable_gfx_cgpg(rdev, enable);
7108         cik_enable_gfx_static_mgpg(rdev, enable);
7109         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7110 }
7111
7112 u32 cik_get_csb_size(struct radeon_device *rdev)
7113 {
7114         u32 count = 0;
7115         const struct cs_section_def *sect = NULL;
7116         const struct cs_extent_def *ext = NULL;
7117
7118         if (rdev->rlc.cs_data == NULL)
7119                 return 0;
7120
7121         /* begin clear state */
7122         count += 2;
7123         /* context control state */
7124         count += 3;
7125
7126         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7127                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7128                         if (sect->id == SECT_CONTEXT)
7129                                 count += 2 + ext->reg_count;
7130                         else
7131                                 return 0;
7132                 }
7133         }
7134         /* pa_sc_raster_config/pa_sc_raster_config1 */
7135         count += 4;
7136         /* end clear state */
7137         count += 2;
7138         /* clear state */
7139         count += 2;
7140
7141         return count;
7142 }
7143
7144 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7145 {
7146         u32 count = 0, i;
7147         const struct cs_section_def *sect = NULL;
7148         const struct cs_extent_def *ext = NULL;
7149
7150         if (rdev->rlc.cs_data == NULL)
7151                 return;
7152         if (buffer == NULL)
7153                 return;
7154
7155         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7156         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7157
7158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7159         buffer[count++] = cpu_to_le32(0x80000000);
7160         buffer[count++] = cpu_to_le32(0x80000000);
7161
7162         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7163                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7164                         if (sect->id == SECT_CONTEXT) {
7165                                 buffer[count++] =
7166                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7167                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7168                                 for (i = 0; i < ext->reg_count; i++)
7169                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7170                         } else {
7171                                 return;
7172                         }
7173                 }
7174         }
7175
7176         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7177         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7178         switch (rdev->family) {
7179         case CHIP_BONAIRE:
7180                 buffer[count++] = cpu_to_le32(0x16000012);
7181                 buffer[count++] = cpu_to_le32(0x00000000);
7182                 break;
7183         case CHIP_KAVERI:
7184                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7185                 buffer[count++] = cpu_to_le32(0x00000000);
7186                 break;
7187         case CHIP_KABINI:
7188         case CHIP_MULLINS:
7189                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7190                 buffer[count++] = cpu_to_le32(0x00000000);
7191                 break;
7192         case CHIP_HAWAII:
7193                 buffer[count++] = cpu_to_le32(0x3a00161a);
7194                 buffer[count++] = cpu_to_le32(0x0000002e);
7195                 break;
7196         default:
7197                 buffer[count++] = cpu_to_le32(0x00000000);
7198                 buffer[count++] = cpu_to_le32(0x00000000);
7199                 break;
7200         }
7201
7202         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7203         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7204
7205         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7206         buffer[count++] = cpu_to_le32(0);
7207 }
7208
7209 static void cik_init_pg(struct radeon_device *rdev)
7210 {
7211         if (rdev->pg_flags) {
7212                 cik_enable_sck_slowdown_on_pu(rdev, true);
7213                 cik_enable_sck_slowdown_on_pd(rdev, true);
7214                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7215                         cik_init_gfx_cgpg(rdev);
7216                         cik_enable_cp_pg(rdev, true);
7217                         cik_enable_gds_pg(rdev, true);
7218                 }
7219                 cik_init_ao_cu_mask(rdev);
7220                 cik_update_gfx_pg(rdev, true);
7221         }
7222 }
7223
7224 static void cik_fini_pg(struct radeon_device *rdev)
7225 {
7226         if (rdev->pg_flags) {
7227                 cik_update_gfx_pg(rdev, false);
7228                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7229                         cik_enable_cp_pg(rdev, false);
7230                         cik_enable_gds_pg(rdev, false);
7231                 }
7232         }
7233 }
7234
7235 /*
7236  * Interrupts
7237  * Starting with r6xx, interrupts are handled via a ring buffer.
7238  * Ring buffers are areas of GPU accessible memory that the GPU
7239  * writes interrupt vectors into and the host reads vectors out of.
7240  * There is a rptr (read pointer) that determines where the
7241  * host is currently reading, and a wptr (write pointer)
7242  * which determines where the GPU has written.  When the
7243  * pointers are equal, the ring is idle.  When the GPU
7244  * writes vectors to the ring buffer, it increments the
7245  * wptr.  When there is an interrupt, the host then starts
7246  * fetching commands and processing them until the pointers are
7247  * equal again at which point it updates the rptr.
7248  */
7249
7250 /**
7251  * cik_enable_interrupts - Enable the interrupt ring buffer
7252  *
7253  * @rdev: radeon_device pointer
7254  *
7255  * Enable the interrupt ring buffer (CIK).
7256  */
7257 static void cik_enable_interrupts(struct radeon_device *rdev)
7258 {
7259         u32 ih_cntl = RREG32(IH_CNTL);
7260         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7261
7262         ih_cntl |= ENABLE_INTR;
7263         ih_rb_cntl |= IH_RB_ENABLE;
7264         WREG32(IH_CNTL, ih_cntl);
7265         WREG32(IH_RB_CNTL, ih_rb_cntl);
7266         rdev->ih.enabled = true;
7267 }
7268
7269 /**
7270  * cik_disable_interrupts - Disable the interrupt ring buffer
7271  *
7272  * @rdev: radeon_device pointer
7273  *
7274  * Disable the interrupt ring buffer (CIK).
7275  */
7276 static void cik_disable_interrupts(struct radeon_device *rdev)
7277 {
7278         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7279         u32 ih_cntl = RREG32(IH_CNTL);
7280
7281         ih_rb_cntl &= ~IH_RB_ENABLE;
7282         ih_cntl &= ~ENABLE_INTR;
7283         WREG32(IH_RB_CNTL, ih_rb_cntl);
7284         WREG32(IH_CNTL, ih_cntl);
7285         /* set rptr, wptr to 0 */
7286         WREG32(IH_RB_RPTR, 0);
7287         WREG32(IH_RB_WPTR, 0);
7288         rdev->ih.enabled = false;
7289         rdev->ih.rptr = 0;
7290 }
7291
7292 /**
7293  * cik_disable_interrupt_state - Disable all interrupt sources
7294  *
7295  * @rdev: radeon_device pointer
7296  *
7297  * Clear all interrupt enable bits used by the driver (CIK).
7298  */
7299 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7300 {
7301         u32 tmp;
7302
7303         /* gfx ring */
7304         tmp = RREG32(CP_INT_CNTL_RING0) &
7305                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7306         WREG32(CP_INT_CNTL_RING0, tmp);
7307         /* sdma */
7308         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7309         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7310         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7311         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7312         /* compute queues */
7313         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7314         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7315         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7316         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7317         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7318         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7319         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7320         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7321         /* grbm */
7322         WREG32(GRBM_INT_CNTL, 0);
7323         /* SRBM */
7324         WREG32(SRBM_INT_CNTL, 0);
7325         /* vline/vblank, etc. */
7326         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7327         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7328         if (rdev->num_crtc >= 4) {
7329                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7330                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7331         }
7332         if (rdev->num_crtc >= 6) {
7333                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7334                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7335         }
7336         /* pflip */
7337         if (rdev->num_crtc >= 2) {
7338                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7339                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7340         }
7341         if (rdev->num_crtc >= 4) {
7342                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7343                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7344         }
7345         if (rdev->num_crtc >= 6) {
7346                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7347                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7348         }
7349
7350         /* dac hotplug */
7351         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7352
7353         /* digital hotplug */
7354         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7355         WREG32(DC_HPD1_INT_CONTROL, tmp);
7356         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7357         WREG32(DC_HPD2_INT_CONTROL, tmp);
7358         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7359         WREG32(DC_HPD3_INT_CONTROL, tmp);
7360         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7361         WREG32(DC_HPD4_INT_CONTROL, tmp);
7362         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7363         WREG32(DC_HPD5_INT_CONTROL, tmp);
7364         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7365         WREG32(DC_HPD6_INT_CONTROL, tmp);
7366
7367 }
7368
7369 /**
7370  * cik_irq_init - init and enable the interrupt ring
7371  *
7372  * @rdev: radeon_device pointer
7373  *
7374  * Allocate a ring buffer for the interrupt controller,
7375  * enable the RLC, disable interrupts, enable the IH
7376  * ring buffer and enable it (CIK).
7377  * Called at device load and reume.
7378  * Returns 0 for success, errors for failure.
7379  */
7380 static int cik_irq_init(struct radeon_device *rdev)
7381 {
7382         int ret = 0;
7383         int rb_bufsz;
7384         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7385
7386         /* allocate ring */
7387         ret = r600_ih_ring_alloc(rdev);
7388         if (ret)
7389                 return ret;
7390
7391         /* disable irqs */
7392         cik_disable_interrupts(rdev);
7393
7394         /* init rlc */
7395         ret = cik_rlc_resume(rdev);
7396         if (ret) {
7397                 r600_ih_ring_fini(rdev);
7398                 return ret;
7399         }
7400
7401         /* setup interrupt control */
7402         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7403         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7404         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7405         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7406          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7407          */
7408         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7409         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7410         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7411         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7412
7413         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7414         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7415
7416         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7417                       IH_WPTR_OVERFLOW_CLEAR |
7418                       (rb_bufsz << 1));
7419
7420         if (rdev->wb.enabled)
7421                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7422
7423         /* set the writeback address whether it's enabled or not */
7424         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7425         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7426
7427         WREG32(IH_RB_CNTL, ih_rb_cntl);
7428
7429         /* set rptr, wptr to 0 */
7430         WREG32(IH_RB_RPTR, 0);
7431         WREG32(IH_RB_WPTR, 0);
7432
7433         /* Default settings for IH_CNTL (disabled at first) */
7434         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7435         /* RPTR_REARM only works if msi's are enabled */
7436         if (rdev->msi_enabled)
7437                 ih_cntl |= RPTR_REARM;
7438         WREG32(IH_CNTL, ih_cntl);
7439
7440         /* force the active interrupt state to all disabled */
7441         cik_disable_interrupt_state(rdev);
7442
7443         pci_set_master(rdev->pdev);
7444
7445         /* enable irqs */
7446         cik_enable_interrupts(rdev);
7447
7448         return ret;
7449 }
7450
7451 /**
7452  * cik_irq_set - enable/disable interrupt sources
7453  *
7454  * @rdev: radeon_device pointer
7455  *
7456  * Enable interrupt sources on the GPU (vblanks, hpd,
7457  * etc.) (CIK).
7458  * Returns 0 for success, errors for failure.
7459  */
7460 int cik_irq_set(struct radeon_device *rdev)
7461 {
7462         u32 cp_int_cntl;
7463         u32 cp_m1p0;
7464         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7465         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7466         u32 grbm_int_cntl = 0;
7467         u32 dma_cntl, dma_cntl1;
7468
7469         if (!rdev->irq.installed) {
7470                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7471                 return -EINVAL;
7472         }
7473         /* don't enable anything if the ih is disabled */
7474         if (!rdev->ih.enabled) {
7475                 cik_disable_interrupts(rdev);
7476                 /* force the active interrupt state to all disabled */
7477                 cik_disable_interrupt_state(rdev);
7478                 return 0;
7479         }
7480
7481         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7482                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7483         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7484
7485         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7486         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7487         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7488         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7489         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7490         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7491
7492         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7493         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7494
7495         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7496
7497         /* enable CP interrupts on all rings */
7498         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7499                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7500                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7501         }
7502         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7503                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7504                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7505                 if (ring->me == 1) {
7506                         switch (ring->pipe) {
7507                         case 0:
7508                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7509                                 break;
7510                         default:
7511                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7512                                 break;
7513                         }
7514                 } else {
7515                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7516                 }
7517         }
7518         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7519                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7520                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7521                 if (ring->me == 1) {
7522                         switch (ring->pipe) {
7523                         case 0:
7524                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7525                                 break;
7526                         default:
7527                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7528                                 break;
7529                         }
7530                 } else {
7531                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7532                 }
7533         }
7534
7535         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7536                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7537                 dma_cntl |= TRAP_ENABLE;
7538         }
7539
7540         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7541                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7542                 dma_cntl1 |= TRAP_ENABLE;
7543         }
7544
7545         if (rdev->irq.crtc_vblank_int[0] ||
7546             atomic_read(&rdev->irq.pflip[0])) {
7547                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7548                 crtc1 |= VBLANK_INTERRUPT_MASK;
7549         }
7550         if (rdev->irq.crtc_vblank_int[1] ||
7551             atomic_read(&rdev->irq.pflip[1])) {
7552                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7553                 crtc2 |= VBLANK_INTERRUPT_MASK;
7554         }
7555         if (rdev->irq.crtc_vblank_int[2] ||
7556             atomic_read(&rdev->irq.pflip[2])) {
7557                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7558                 crtc3 |= VBLANK_INTERRUPT_MASK;
7559         }
7560         if (rdev->irq.crtc_vblank_int[3] ||
7561             atomic_read(&rdev->irq.pflip[3])) {
7562                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7563                 crtc4 |= VBLANK_INTERRUPT_MASK;
7564         }
7565         if (rdev->irq.crtc_vblank_int[4] ||
7566             atomic_read(&rdev->irq.pflip[4])) {
7567                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7568                 crtc5 |= VBLANK_INTERRUPT_MASK;
7569         }
7570         if (rdev->irq.crtc_vblank_int[5] ||
7571             atomic_read(&rdev->irq.pflip[5])) {
7572                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7573                 crtc6 |= VBLANK_INTERRUPT_MASK;
7574         }
7575         if (rdev->irq.hpd[0]) {
7576                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7577                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7578         }
7579         if (rdev->irq.hpd[1]) {
7580                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7581                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7582         }
7583         if (rdev->irq.hpd[2]) {
7584                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7585                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7586         }
7587         if (rdev->irq.hpd[3]) {
7588                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7589                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7590         }
7591         if (rdev->irq.hpd[4]) {
7592                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7593                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7594         }
7595         if (rdev->irq.hpd[5]) {
7596                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7597                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7598         }
7599
7600         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7601
7602         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7603         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7604
7605         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7606
7607         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7608
7609         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7610         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7611         if (rdev->num_crtc >= 4) {
7612                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7613                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7614         }
7615         if (rdev->num_crtc >= 6) {
7616                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7617                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7618         }
7619
7620         if (rdev->num_crtc >= 2) {
7621                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7622                        GRPH_PFLIP_INT_MASK);
7623                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7624                        GRPH_PFLIP_INT_MASK);
7625         }
7626         if (rdev->num_crtc >= 4) {
7627                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7628                        GRPH_PFLIP_INT_MASK);
7629                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7630                        GRPH_PFLIP_INT_MASK);
7631         }
7632         if (rdev->num_crtc >= 6) {
7633                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7634                        GRPH_PFLIP_INT_MASK);
7635                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7636                        GRPH_PFLIP_INT_MASK);
7637         }
7638
7639         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7640         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7641         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7642         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7643         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7644         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7645
7646         /* posting read */
7647         RREG32(SRBM_STATUS);
7648
7649         return 0;
7650 }
7651
7652 /**
7653  * cik_irq_ack - ack interrupt sources
7654  *
7655  * @rdev: radeon_device pointer
7656  *
7657  * Ack interrupt sources on the GPU (vblanks, hpd,
7658  * etc.) (CIK).  Certain interrupts sources are sw
7659  * generated and do not require an explicit ack.
7660  */
7661 static inline void cik_irq_ack(struct radeon_device *rdev)
7662 {
7663         u32 tmp;
7664
7665         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7666         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7667         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7668         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7669         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7670         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7671         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7672
7673         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7674                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7675         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7676                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7677         if (rdev->num_crtc >= 4) {
7678                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7679                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7680                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7681                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7682         }
7683         if (rdev->num_crtc >= 6) {
7684                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7685                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7686                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7687                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7688         }
7689
7690         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7691                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7692                        GRPH_PFLIP_INT_CLEAR);
7693         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7694                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7695                        GRPH_PFLIP_INT_CLEAR);
7696         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7697                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7698         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7699                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7700         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7701                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7702         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7703                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7704
7705         if (rdev->num_crtc >= 4) {
7706                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7707                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7708                                GRPH_PFLIP_INT_CLEAR);
7709                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7710                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7711                                GRPH_PFLIP_INT_CLEAR);
7712                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7713                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7714                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7715                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7716                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7717                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7718                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7719                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7720         }
7721
7722         if (rdev->num_crtc >= 6) {
7723                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7724                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7725                                GRPH_PFLIP_INT_CLEAR);
7726                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7727                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7728                                GRPH_PFLIP_INT_CLEAR);
7729                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7730                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7731                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7732                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7733                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7734                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7735                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7736                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7737         }
7738
7739         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7740                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7741                 tmp |= DC_HPDx_INT_ACK;
7742                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7743         }
7744         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7745                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7746                 tmp |= DC_HPDx_INT_ACK;
7747                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7748         }
7749         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7750                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7751                 tmp |= DC_HPDx_INT_ACK;
7752                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7753         }
7754         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7755                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7756                 tmp |= DC_HPDx_INT_ACK;
7757                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7758         }
7759         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7760                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7761                 tmp |= DC_HPDx_INT_ACK;
7762                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7763         }
7764         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7765                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7766                 tmp |= DC_HPDx_INT_ACK;
7767                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7768         }
7769         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7770                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7771                 tmp |= DC_HPDx_RX_INT_ACK;
7772                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7773         }
7774         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7775                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7776                 tmp |= DC_HPDx_RX_INT_ACK;
7777                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7778         }
7779         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7780                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7781                 tmp |= DC_HPDx_RX_INT_ACK;
7782                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7783         }
7784         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7785                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7786                 tmp |= DC_HPDx_RX_INT_ACK;
7787                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7788         }
7789         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7790                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7791                 tmp |= DC_HPDx_RX_INT_ACK;
7792                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7793         }
7794         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7795                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7796                 tmp |= DC_HPDx_RX_INT_ACK;
7797                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7798         }
7799 }
7800
7801 /**
7802  * cik_irq_disable - disable interrupts
7803  *
7804  * @rdev: radeon_device pointer
7805  *
7806  * Disable interrupts on the hw (CIK).
7807  */
7808 static void cik_irq_disable(struct radeon_device *rdev)
7809 {
7810         cik_disable_interrupts(rdev);
7811         /* Wait and acknowledge irq */
7812         mdelay(1);
7813         cik_irq_ack(rdev);
7814         cik_disable_interrupt_state(rdev);
7815 }
7816
7817 /**
7818  * cik_irq_disable - disable interrupts for suspend
7819  *
7820  * @rdev: radeon_device pointer
7821  *
7822  * Disable interrupts and stop the RLC (CIK).
7823  * Used for suspend.
7824  */
7825 static void cik_irq_suspend(struct radeon_device *rdev)
7826 {
7827         cik_irq_disable(rdev);
7828         cik_rlc_stop(rdev);
7829 }
7830
7831 /**
7832  * cik_irq_fini - tear down interrupt support
7833  *
7834  * @rdev: radeon_device pointer
7835  *
7836  * Disable interrupts on the hw and free the IH ring
7837  * buffer (CIK).
7838  * Used for driver unload.
7839  */
7840 static void cik_irq_fini(struct radeon_device *rdev)
7841 {
7842         cik_irq_suspend(rdev);
7843         r600_ih_ring_fini(rdev);
7844 }
7845
7846 /**
7847  * cik_get_ih_wptr - get the IH ring buffer wptr
7848  *
7849  * @rdev: radeon_device pointer
7850  *
7851  * Get the IH ring buffer wptr from either the register
7852  * or the writeback memory buffer (CIK).  Also check for
7853  * ring buffer overflow and deal with it.
7854  * Used by cik_irq_process().
7855  * Returns the value of the wptr.
7856  */
7857 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7858 {
7859         u32 wptr, tmp;
7860
7861         if (rdev->wb.enabled)
7862                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7863         else
7864                 wptr = RREG32(IH_RB_WPTR);
7865
7866         if (wptr & RB_OVERFLOW) {
7867                 wptr &= ~RB_OVERFLOW;
7868                 /* When a ring buffer overflow happen start parsing interrupt
7869                  * from the last not overwritten vector (wptr + 16). Hopefully
7870                  * this should allow us to catchup.
7871                  */
7872                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7873                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7874                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7875                 tmp = RREG32(IH_RB_CNTL);
7876                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7877                 WREG32(IH_RB_CNTL, tmp);
7878         }
7879         return (wptr & rdev->ih.ptr_mask);
7880 }
7881
7882 /*        CIK IV Ring
7883  * Each IV ring entry is 128 bits:
7884  * [7:0]    - interrupt source id
7885  * [31:8]   - reserved
7886  * [59:32]  - interrupt source data
7887  * [63:60]  - reserved
7888  * [71:64]  - RINGID
7889  *            CP:
7890  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7891  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7892  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7893  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7894  *            PIPE_ID - ME0 0=3D
7895  *                    - ME1&2 compute dispatcher (4 pipes each)
7896  *            SDMA:
7897  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7898  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7899  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7900  * [79:72]  - VMID
7901  * [95:80]  - PASID
7902  * [127:96] - reserved
7903  */
7904 /**
7905  * cik_irq_process - interrupt handler
7906  *
7907  * @rdev: radeon_device pointer
7908  *
7909  * Interrupt hander (CIK).  Walk the IH ring,
7910  * ack interrupts and schedule work to handle
7911  * interrupt events.
7912  * Returns irq process return code.
7913  */
7914 int cik_irq_process(struct radeon_device *rdev)
7915 {
7916         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7917         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7918         u32 wptr;
7919         u32 rptr;
7920         u32 src_id, src_data, ring_id;
7921         u8 me_id, pipe_id, queue_id;
7922         u32 ring_index;
7923         bool queue_hotplug = false;
7924         bool queue_dp = false;
7925         bool queue_reset = false;
7926         u32 addr, status, mc_client;
7927         bool queue_thermal = false;
7928
7929         if (!rdev->ih.enabled || rdev->shutdown)
7930                 return IRQ_NONE;
7931
7932         wptr = cik_get_ih_wptr(rdev);
7933
7934 restart_ih:
7935         /* is somebody else already processing irqs? */
7936         if (atomic_xchg(&rdev->ih.lock, 1))
7937                 return IRQ_NONE;
7938
7939         rptr = rdev->ih.rptr;
7940         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7941
7942         /* Order reading of wptr vs. reading of IH ring data */
7943         rmb();
7944
7945         /* display interrupts */
7946         cik_irq_ack(rdev);
7947
7948         while (rptr != wptr) {
7949                 /* wptr/rptr are in bytes! */
7950                 ring_index = rptr / 4;
7951
7952                 radeon_kfd_interrupt(rdev,
7953                                 (const void *) &rdev->ih.ring[ring_index]);
7954
7955                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7956                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7957                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7958
7959                 switch (src_id) {
7960                 case 1: /* D1 vblank/vline */
7961                         switch (src_data) {
7962                         case 0: /* D1 vblank */
7963                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7964                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7965
7966                                 if (rdev->irq.crtc_vblank_int[0]) {
7967                                         drm_handle_vblank(rdev->ddev, 0);
7968                                         rdev->pm.vblank_sync = true;
7969                                         wake_up(&rdev->irq.vblank_queue);
7970                                 }
7971                                 if (atomic_read(&rdev->irq.pflip[0]))
7972                                         radeon_crtc_handle_vblank(rdev, 0);
7973                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7974                                 DRM_DEBUG("IH: D1 vblank\n");
7975
7976                                 break;
7977                         case 1: /* D1 vline */
7978                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7979                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7980
7981                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7982                                 DRM_DEBUG("IH: D1 vline\n");
7983
7984                                 break;
7985                         default:
7986                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7987                                 break;
7988                         }
7989                         break;
7990                 case 2: /* D2 vblank/vline */
7991                         switch (src_data) {
7992                         case 0: /* D2 vblank */
7993                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7994                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7995
7996                                 if (rdev->irq.crtc_vblank_int[1]) {
7997                                         drm_handle_vblank(rdev->ddev, 1);
7998                                         rdev->pm.vblank_sync = true;
7999                                         wake_up(&rdev->irq.vblank_queue);
8000                                 }
8001                                 if (atomic_read(&rdev->irq.pflip[1]))
8002                                         radeon_crtc_handle_vblank(rdev, 1);
8003                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8004                                 DRM_DEBUG("IH: D2 vblank\n");
8005
8006                                 break;
8007                         case 1: /* D2 vline */
8008                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
8009                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8010
8011                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8012                                 DRM_DEBUG("IH: D2 vline\n");
8013
8014                                 break;
8015                         default:
8016                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8017                                 break;
8018                         }
8019                         break;
8020                 case 3: /* D3 vblank/vline */
8021                         switch (src_data) {
8022                         case 0: /* D3 vblank */
8023                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8024                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8025
8026                                 if (rdev->irq.crtc_vblank_int[2]) {
8027                                         drm_handle_vblank(rdev->ddev, 2);
8028                                         rdev->pm.vblank_sync = true;
8029                                         wake_up(&rdev->irq.vblank_queue);
8030                                 }
8031                                 if (atomic_read(&rdev->irq.pflip[2]))
8032                                         radeon_crtc_handle_vblank(rdev, 2);
8033                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8034                                 DRM_DEBUG("IH: D3 vblank\n");
8035
8036                                 break;
8037                         case 1: /* D3 vline */
8038                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8039                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8040
8041                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8042                                 DRM_DEBUG("IH: D3 vline\n");
8043
8044                                 break;
8045                         default:
8046                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8047                                 break;
8048                         }
8049                         break;
8050                 case 4: /* D4 vblank/vline */
8051                         switch (src_data) {
8052                         case 0: /* D4 vblank */
8053                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8054                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8055
8056                                 if (rdev->irq.crtc_vblank_int[3]) {
8057                                         drm_handle_vblank(rdev->ddev, 3);
8058                                         rdev->pm.vblank_sync = true;
8059                                         wake_up(&rdev->irq.vblank_queue);
8060                                 }
8061                                 if (atomic_read(&rdev->irq.pflip[3]))
8062                                         radeon_crtc_handle_vblank(rdev, 3);
8063                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8064                                 DRM_DEBUG("IH: D4 vblank\n");
8065
8066                                 break;
8067                         case 1: /* D4 vline */
8068                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8069                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8070
8071                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8072                                 DRM_DEBUG("IH: D4 vline\n");
8073
8074                                 break;
8075                         default:
8076                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077                                 break;
8078                         }
8079                         break;
8080                 case 5: /* D5 vblank/vline */
8081                         switch (src_data) {
8082                         case 0: /* D5 vblank */
8083                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8084                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8085
8086                                 if (rdev->irq.crtc_vblank_int[4]) {
8087                                         drm_handle_vblank(rdev->ddev, 4);
8088                                         rdev->pm.vblank_sync = true;
8089                                         wake_up(&rdev->irq.vblank_queue);
8090                                 }
8091                                 if (atomic_read(&rdev->irq.pflip[4]))
8092                                         radeon_crtc_handle_vblank(rdev, 4);
8093                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8094                                 DRM_DEBUG("IH: D5 vblank\n");
8095
8096                                 break;
8097                         case 1: /* D5 vline */
8098                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8099                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8100
8101                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8102                                 DRM_DEBUG("IH: D5 vline\n");
8103
8104                                 break;
8105                         default:
8106                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8107                                 break;
8108                         }
8109                         break;
8110                 case 6: /* D6 vblank/vline */
8111                         switch (src_data) {
8112                         case 0: /* D6 vblank */
8113                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8114                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8115
8116                                 if (rdev->irq.crtc_vblank_int[5]) {
8117                                         drm_handle_vblank(rdev->ddev, 5);
8118                                         rdev->pm.vblank_sync = true;
8119                                         wake_up(&rdev->irq.vblank_queue);
8120                                 }
8121                                 if (atomic_read(&rdev->irq.pflip[5]))
8122                                         radeon_crtc_handle_vblank(rdev, 5);
8123                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8124                                 DRM_DEBUG("IH: D6 vblank\n");
8125
8126                                 break;
8127                         case 1: /* D6 vline */
8128                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8129                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8130
8131                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8132                                 DRM_DEBUG("IH: D6 vline\n");
8133
8134                                 break;
8135                         default:
8136                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8137                                 break;
8138                         }
8139                         break;
8140                 case 8: /* D1 page flip */
8141                 case 10: /* D2 page flip */
8142                 case 12: /* D3 page flip */
8143                 case 14: /* D4 page flip */
8144                 case 16: /* D5 page flip */
8145                 case 18: /* D6 page flip */
8146                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8147                         if (radeon_use_pflipirq > 0)
8148                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8149                         break;
8150                 case 42: /* HPD hotplug */
8151                         switch (src_data) {
8152                         case 0:
8153                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8154                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8155
8156                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8157                                 queue_hotplug = true;
8158                                 DRM_DEBUG("IH: HPD1\n");
8159
8160                                 break;
8161                         case 1:
8162                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8163                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8164
8165                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8166                                 queue_hotplug = true;
8167                                 DRM_DEBUG("IH: HPD2\n");
8168
8169                                 break;
8170                         case 2:
8171                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8172                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8173
8174                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8175                                 queue_hotplug = true;
8176                                 DRM_DEBUG("IH: HPD3\n");
8177
8178                                 break;
8179                         case 3:
8180                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8181                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8182
8183                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8184                                 queue_hotplug = true;
8185                                 DRM_DEBUG("IH: HPD4\n");
8186
8187                                 break;
8188                         case 4:
8189                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8190                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8191
8192                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8193                                 queue_hotplug = true;
8194                                 DRM_DEBUG("IH: HPD5\n");
8195
8196                                 break;
8197                         case 5:
8198                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8199                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8200
8201                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8202                                 queue_hotplug = true;
8203                                 DRM_DEBUG("IH: HPD6\n");
8204
8205                                 break;
8206                         case 6:
8207                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8208                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8209
8210                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8211                                 queue_dp = true;
8212                                 DRM_DEBUG("IH: HPD_RX 1\n");
8213
8214                                 break;
8215                         case 7:
8216                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8217                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8218
8219                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8220                                 queue_dp = true;
8221                                 DRM_DEBUG("IH: HPD_RX 2\n");
8222
8223                                 break;
8224                         case 8:
8225                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8226                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8227
8228                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8229                                 queue_dp = true;
8230                                 DRM_DEBUG("IH: HPD_RX 3\n");
8231
8232                                 break;
8233                         case 9:
8234                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8235                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8236
8237                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8238                                 queue_dp = true;
8239                                 DRM_DEBUG("IH: HPD_RX 4\n");
8240
8241                                 break;
8242                         case 10:
8243                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8244                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8245
8246                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8247                                 queue_dp = true;
8248                                 DRM_DEBUG("IH: HPD_RX 5\n");
8249
8250                                 break;
8251                         case 11:
8252                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8253                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8254
8255                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8256                                 queue_dp = true;
8257                                 DRM_DEBUG("IH: HPD_RX 6\n");
8258
8259                                 break;
8260                         default:
8261                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8262                                 break;
8263                         }
8264                         break;
8265                 case 96:
8266                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8267                         WREG32(SRBM_INT_ACK, 0x1);
8268                         break;
8269                 case 124: /* UVD */
8270                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8271                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8272                         break;
8273                 case 146:
8274                 case 147:
8275                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8276                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8277                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8278                         /* reset addr and status */
8279                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8280                         if (addr == 0x0 && status == 0x0)
8281                                 break;
8282                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8283                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8284                                 addr);
8285                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8286                                 status);
8287                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8288                         break;
8289                 case 167: /* VCE */
8290                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8291                         switch (src_data) {
8292                         case 0:
8293                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8294                                 break;
8295                         case 1:
8296                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8297                                 break;
8298                         default:
8299                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8300                                 break;
8301                         }
8302                         break;
8303                 case 176: /* GFX RB CP_INT */
8304                 case 177: /* GFX IB CP_INT */
8305                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8306                         break;
8307                 case 181: /* CP EOP event */
8308                         DRM_DEBUG("IH: CP EOP\n");
8309                         /* XXX check the bitfield order! */
8310                         me_id = (ring_id & 0x60) >> 5;
8311                         pipe_id = (ring_id & 0x18) >> 3;
8312                         queue_id = (ring_id & 0x7) >> 0;
8313                         switch (me_id) {
8314                         case 0:
8315                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8316                                 break;
8317                         case 1:
8318                         case 2:
8319                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8320                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8321                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8322                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8323                                 break;
8324                         }
8325                         break;
8326                 case 184: /* CP Privileged reg access */
8327                         DRM_ERROR("Illegal register access in command stream\n");
8328                         /* XXX check the bitfield order! */
8329                         me_id = (ring_id & 0x60) >> 5;
8330                         pipe_id = (ring_id & 0x18) >> 3;
8331                         queue_id = (ring_id & 0x7) >> 0;
8332                         switch (me_id) {
8333                         case 0:
8334                                 /* This results in a full GPU reset, but all we need to do is soft
8335                                  * reset the CP for gfx
8336                                  */
8337                                 queue_reset = true;
8338                                 break;
8339                         case 1:
8340                                 /* XXX compute */
8341                                 queue_reset = true;
8342                                 break;
8343                         case 2:
8344                                 /* XXX compute */
8345                                 queue_reset = true;
8346                                 break;
8347                         }
8348                         break;
8349                 case 185: /* CP Privileged inst */
8350                         DRM_ERROR("Illegal instruction in command stream\n");
8351                         /* XXX check the bitfield order! */
8352                         me_id = (ring_id & 0x60) >> 5;
8353                         pipe_id = (ring_id & 0x18) >> 3;
8354                         queue_id = (ring_id & 0x7) >> 0;
8355                         switch (me_id) {
8356                         case 0:
8357                                 /* This results in a full GPU reset, but all we need to do is soft
8358                                  * reset the CP for gfx
8359                                  */
8360                                 queue_reset = true;
8361                                 break;
8362                         case 1:
8363                                 /* XXX compute */
8364                                 queue_reset = true;
8365                                 break;
8366                         case 2:
8367                                 /* XXX compute */
8368                                 queue_reset = true;
8369                                 break;
8370                         }
8371                         break;
8372                 case 224: /* SDMA trap event */
8373                         /* XXX check the bitfield order! */
8374                         me_id = (ring_id & 0x3) >> 0;
8375                         queue_id = (ring_id & 0xc) >> 2;
8376                         DRM_DEBUG("IH: SDMA trap\n");
8377                         switch (me_id) {
8378                         case 0:
8379                                 switch (queue_id) {
8380                                 case 0:
8381                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8382                                         break;
8383                                 case 1:
8384                                         /* XXX compute */
8385                                         break;
8386                                 case 2:
8387                                         /* XXX compute */
8388                                         break;
8389                                 }
8390                                 break;
8391                         case 1:
8392                                 switch (queue_id) {
8393                                 case 0:
8394                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8395                                         break;
8396                                 case 1:
8397                                         /* XXX compute */
8398                                         break;
8399                                 case 2:
8400                                         /* XXX compute */
8401                                         break;
8402                                 }
8403                                 break;
8404                         }
8405                         break;
8406                 case 230: /* thermal low to high */
8407                         DRM_DEBUG("IH: thermal low to high\n");
8408                         rdev->pm.dpm.thermal.high_to_low = false;
8409                         queue_thermal = true;
8410                         break;
8411                 case 231: /* thermal high to low */
8412                         DRM_DEBUG("IH: thermal high to low\n");
8413                         rdev->pm.dpm.thermal.high_to_low = true;
8414                         queue_thermal = true;
8415                         break;
8416                 case 233: /* GUI IDLE */
8417                         DRM_DEBUG("IH: GUI idle\n");
8418                         break;
8419                 case 241: /* SDMA Privileged inst */
8420                 case 247: /* SDMA Privileged inst */
8421                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8422                         /* XXX check the bitfield order! */
8423                         me_id = (ring_id & 0x3) >> 0;
8424                         queue_id = (ring_id & 0xc) >> 2;
8425                         switch (me_id) {
8426                         case 0:
8427                                 switch (queue_id) {
8428                                 case 0:
8429                                         queue_reset = true;
8430                                         break;
8431                                 case 1:
8432                                         /* XXX compute */
8433                                         queue_reset = true;
8434                                         break;
8435                                 case 2:
8436                                         /* XXX compute */
8437                                         queue_reset = true;
8438                                         break;
8439                                 }
8440                                 break;
8441                         case 1:
8442                                 switch (queue_id) {
8443                                 case 0:
8444                                         queue_reset = true;
8445                                         break;
8446                                 case 1:
8447                                         /* XXX compute */
8448                                         queue_reset = true;
8449                                         break;
8450                                 case 2:
8451                                         /* XXX compute */
8452                                         queue_reset = true;
8453                                         break;
8454                                 }
8455                                 break;
8456                         }
8457                         break;
8458                 default:
8459                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8460                         break;
8461                 }
8462
8463                 /* wptr/rptr are in bytes! */
8464                 rptr += 16;
8465                 rptr &= rdev->ih.ptr_mask;
8466                 WREG32(IH_RB_RPTR, rptr);
8467         }
8468         if (queue_dp)
8469                 schedule_work(&rdev->dp_work);
8470         if (queue_hotplug)
8471                 schedule_delayed_work(&rdev->hotplug_work, 0);
8472         if (queue_reset) {
8473                 rdev->needs_reset = true;
8474                 wake_up_all(&rdev->fence_queue);
8475         }
8476         if (queue_thermal)
8477                 schedule_work(&rdev->pm.dpm.thermal.work);
8478         rdev->ih.rptr = rptr;
8479         atomic_set(&rdev->ih.lock, 0);
8480
8481         /* make sure wptr hasn't changed while processing */
8482         wptr = cik_get_ih_wptr(rdev);
8483         if (wptr != rptr)
8484                 goto restart_ih;
8485
8486         return IRQ_HANDLED;
8487 }
8488
8489 /*
8490  * startup/shutdown callbacks
8491  */
8492 /**
8493  * cik_startup - program the asic to a functional state
8494  *
8495  * @rdev: radeon_device pointer
8496  *
8497  * Programs the asic to a functional state (CIK).
8498  * Called by cik_init() and cik_resume().
8499  * Returns 0 for success, error for failure.
8500  */
8501 static int cik_startup(struct radeon_device *rdev)
8502 {
8503         struct radeon_ring *ring;
8504         u32 nop;
8505         int r;
8506
8507         /* enable pcie gen2/3 link */
8508         cik_pcie_gen3_enable(rdev);
8509         /* enable aspm */
8510         cik_program_aspm(rdev);
8511
8512         /* scratch needs to be initialized before MC */
8513         r = r600_vram_scratch_init(rdev);
8514         if (r)
8515                 return r;
8516
8517         cik_mc_program(rdev);
8518
8519         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8520                 r = ci_mc_load_microcode(rdev);
8521                 if (r) {
8522                         DRM_ERROR("Failed to load MC firmware!\n");
8523                         return r;
8524                 }
8525         }
8526
8527         r = cik_pcie_gart_enable(rdev);
8528         if (r)
8529                 return r;
8530         cik_gpu_init(rdev);
8531
8532         /* allocate rlc buffers */
8533         if (rdev->flags & RADEON_IS_IGP) {
8534                 if (rdev->family == CHIP_KAVERI) {
8535                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8536                         rdev->rlc.reg_list_size =
8537                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8538                 } else {
8539                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8540                         rdev->rlc.reg_list_size =
8541                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8542                 }
8543         }
8544         rdev->rlc.cs_data = ci_cs_data;
8545         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8546         r = sumo_rlc_init(rdev);
8547         if (r) {
8548                 DRM_ERROR("Failed to init rlc BOs!\n");
8549                 return r;
8550         }
8551
8552         /* allocate wb buffer */
8553         r = radeon_wb_init(rdev);
8554         if (r)
8555                 return r;
8556
8557         /* allocate mec buffers */
8558         r = cik_mec_init(rdev);
8559         if (r) {
8560                 DRM_ERROR("Failed to init MEC BOs!\n");
8561                 return r;
8562         }
8563
8564         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8565         if (r) {
8566                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8567                 return r;
8568         }
8569
8570         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8571         if (r) {
8572                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8573                 return r;
8574         }
8575
8576         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8577         if (r) {
8578                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8579                 return r;
8580         }
8581
8582         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8583         if (r) {
8584                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8585                 return r;
8586         }
8587
8588         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8589         if (r) {
8590                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8591                 return r;
8592         }
8593
8594         r = radeon_uvd_resume(rdev);
8595         if (!r) {
8596                 r = uvd_v4_2_resume(rdev);
8597                 if (!r) {
8598                         r = radeon_fence_driver_start_ring(rdev,
8599                                                            R600_RING_TYPE_UVD_INDEX);
8600                         if (r)
8601                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8602                 }
8603         }
8604         if (r)
8605                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8606
8607         r = radeon_vce_resume(rdev);
8608         if (!r) {
8609                 r = vce_v2_0_resume(rdev);
8610                 if (!r)
8611                         r = radeon_fence_driver_start_ring(rdev,
8612                                                            TN_RING_TYPE_VCE1_INDEX);
8613                 if (!r)
8614                         r = radeon_fence_driver_start_ring(rdev,
8615                                                            TN_RING_TYPE_VCE2_INDEX);
8616         }
8617         if (r) {
8618                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8619                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8620                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8621         }
8622
8623         /* Enable IRQ */
8624         if (!rdev->irq.installed) {
8625                 r = radeon_irq_kms_init(rdev);
8626                 if (r)
8627                         return r;
8628         }
8629
8630         r = cik_irq_init(rdev);
8631         if (r) {
8632                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8633                 radeon_irq_kms_fini(rdev);
8634                 return r;
8635         }
8636         cik_irq_set(rdev);
8637
8638         if (rdev->family == CHIP_HAWAII) {
8639                 if (rdev->new_fw)
8640                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8641                 else
8642                         nop = RADEON_CP_PACKET2;
8643         } else {
8644                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8645         }
8646
8647         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8648         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8649                              nop);
8650         if (r)
8651                 return r;
8652
8653         /* set up the compute queues */
8654         /* type-2 packets are deprecated on MEC, use type-3 instead */
8655         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8656         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8657                              nop);
8658         if (r)
8659                 return r;
8660         ring->me = 1; /* first MEC */
8661         ring->pipe = 0; /* first pipe */
8662         ring->queue = 0; /* first queue */
8663         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8664
8665         /* type-2 packets are deprecated on MEC, use type-3 instead */
8666         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8667         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8668                              nop);
8669         if (r)
8670                 return r;
8671         /* dGPU only have 1 MEC */
8672         ring->me = 1; /* first MEC */
8673         ring->pipe = 0; /* first pipe */
8674         ring->queue = 1; /* second queue */
8675         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8676
8677         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8678         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8679                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8680         if (r)
8681                 return r;
8682
8683         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8684         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8685                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8686         if (r)
8687                 return r;
8688
8689         r = cik_cp_resume(rdev);
8690         if (r)
8691                 return r;
8692
8693         r = cik_sdma_resume(rdev);
8694         if (r)
8695                 return r;
8696
8697         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8698         if (ring->ring_size) {
8699                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8700                                      RADEON_CP_PACKET2);
8701                 if (!r)
8702                         r = uvd_v1_0_init(rdev);
8703                 if (r)
8704                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8705         }
8706
8707         r = -ENOENT;
8708
8709         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8710         if (ring->ring_size)
8711                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8712                                      VCE_CMD_NO_OP);
8713
8714         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8715         if (ring->ring_size)
8716                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8717                                      VCE_CMD_NO_OP);
8718
8719         if (!r)
8720                 r = vce_v1_0_init(rdev);
8721         else if (r != -ENOENT)
8722                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8723
8724         r = radeon_ib_pool_init(rdev);
8725         if (r) {
8726                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8727                 return r;
8728         }
8729
8730         r = radeon_vm_manager_init(rdev);
8731         if (r) {
8732                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8733                 return r;
8734         }
8735
8736         r = radeon_audio_init(rdev);
8737         if (r)
8738                 return r;
8739
8740         r = radeon_kfd_resume(rdev);
8741         if (r)
8742                 return r;
8743
8744         return 0;
8745 }
8746
8747 /**
8748  * cik_resume - resume the asic to a functional state
8749  *
8750  * @rdev: radeon_device pointer
8751  *
8752  * Programs the asic to a functional state (CIK).
8753  * Called at resume.
8754  * Returns 0 for success, error for failure.
8755  */
8756 int cik_resume(struct radeon_device *rdev)
8757 {
8758         int r;
8759
8760         /* post card */
8761         atom_asic_init(rdev->mode_info.atom_context);
8762
8763         /* init golden registers */
8764         cik_init_golden_registers(rdev);
8765
8766         if (rdev->pm.pm_method == PM_METHOD_DPM)
8767                 radeon_pm_resume(rdev);
8768
8769         rdev->accel_working = true;
8770         r = cik_startup(rdev);
8771         if (r) {
8772                 DRM_ERROR("cik startup failed on resume\n");
8773                 rdev->accel_working = false;
8774                 return r;
8775         }
8776
8777         return r;
8778
8779 }
8780
8781 /**
8782  * cik_suspend - suspend the asic
8783  *
8784  * @rdev: radeon_device pointer
8785  *
8786  * Bring the chip into a state suitable for suspend (CIK).
8787  * Called at suspend.
8788  * Returns 0 for success.
8789  */
8790 int cik_suspend(struct radeon_device *rdev)
8791 {
8792         radeon_kfd_suspend(rdev);
8793         radeon_pm_suspend(rdev);
8794         radeon_audio_fini(rdev);
8795         radeon_vm_manager_fini(rdev);
8796         cik_cp_enable(rdev, false);
8797         cik_sdma_enable(rdev, false);
8798         uvd_v1_0_fini(rdev);
8799         radeon_uvd_suspend(rdev);
8800         radeon_vce_suspend(rdev);
8801         cik_fini_pg(rdev);
8802         cik_fini_cg(rdev);
8803         cik_irq_suspend(rdev);
8804         radeon_wb_disable(rdev);
8805         cik_pcie_gart_disable(rdev);
8806         return 0;
8807 }
8808
8809 /* Plan is to move initialization in that function and use
8810  * helper function so that radeon_device_init pretty much
8811  * do nothing more than calling asic specific function. This
8812  * should also allow to remove a bunch of callback function
8813  * like vram_info.
8814  */
8815 /**
8816  * cik_init - asic specific driver and hw init
8817  *
8818  * @rdev: radeon_device pointer
8819  *
8820  * Setup asic specific driver variables and program the hw
8821  * to a functional state (CIK).
8822  * Called at driver startup.
8823  * Returns 0 for success, errors for failure.
8824  */
8825 int cik_init(struct radeon_device *rdev)
8826 {
8827         struct radeon_ring *ring;
8828         int r;
8829
8830         /* Read BIOS */
8831         if (!radeon_get_bios(rdev)) {
8832                 if (ASIC_IS_AVIVO(rdev))
8833                         return -EINVAL;
8834         }
8835         /* Must be an ATOMBIOS */
8836         if (!rdev->is_atom_bios) {
8837                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8838                 return -EINVAL;
8839         }
8840         r = radeon_atombios_init(rdev);
8841         if (r)
8842                 return r;
8843
8844         /* Post card if necessary */
8845         if (!radeon_card_posted(rdev)) {
8846                 if (!rdev->bios) {
8847                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8848                         return -EINVAL;
8849                 }
8850                 DRM_INFO("GPU not posted. posting now...\n");
8851                 atom_asic_init(rdev->mode_info.atom_context);
8852         }
8853         /* init golden registers */
8854         cik_init_golden_registers(rdev);
8855         /* Initialize scratch registers */
8856         cik_scratch_init(rdev);
8857         /* Initialize surface registers */
8858         radeon_surface_init(rdev);
8859         /* Initialize clocks */
8860         radeon_get_clock_info(rdev->ddev);
8861
8862         /* Fence driver */
8863         r = radeon_fence_driver_init(rdev);
8864         if (r)
8865                 return r;
8866
8867         /* initialize memory controller */
8868         r = cik_mc_init(rdev);
8869         if (r)
8870                 return r;
8871         /* Memory manager */
8872         r = radeon_bo_init(rdev);
8873         if (r)
8874                 return r;
8875
8876         if (rdev->flags & RADEON_IS_IGP) {
8877                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8878                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8879                         r = cik_init_microcode(rdev);
8880                         if (r) {
8881                                 DRM_ERROR("Failed to load firmware!\n");
8882                                 return r;
8883                         }
8884                 }
8885         } else {
8886                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8887                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8888                     !rdev->mc_fw) {
8889                         r = cik_init_microcode(rdev);
8890                         if (r) {
8891                                 DRM_ERROR("Failed to load firmware!\n");
8892                                 return r;
8893                         }
8894                 }
8895         }
8896
8897         /* Initialize power management */
8898         radeon_pm_init(rdev);
8899
8900         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8901         ring->ring_obj = NULL;
8902         r600_ring_init(rdev, ring, 1024 * 1024);
8903
8904         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8905         ring->ring_obj = NULL;
8906         r600_ring_init(rdev, ring, 1024 * 1024);
8907         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8908         if (r)
8909                 return r;
8910
8911         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8912         ring->ring_obj = NULL;
8913         r600_ring_init(rdev, ring, 1024 * 1024);
8914         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8915         if (r)
8916                 return r;
8917
8918         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8919         ring->ring_obj = NULL;
8920         r600_ring_init(rdev, ring, 256 * 1024);
8921
8922         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8923         ring->ring_obj = NULL;
8924         r600_ring_init(rdev, ring, 256 * 1024);
8925
8926         r = radeon_uvd_init(rdev);
8927         if (!r) {
8928                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8929                 ring->ring_obj = NULL;
8930                 r600_ring_init(rdev, ring, 4096);
8931         }
8932
8933         r = radeon_vce_init(rdev);
8934         if (!r) {
8935                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8936                 ring->ring_obj = NULL;
8937                 r600_ring_init(rdev, ring, 4096);
8938
8939                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8940                 ring->ring_obj = NULL;
8941                 r600_ring_init(rdev, ring, 4096);
8942         }
8943
8944         rdev->ih.ring_obj = NULL;
8945         r600_ih_ring_init(rdev, 64 * 1024);
8946
8947         r = r600_pcie_gart_init(rdev);
8948         if (r)
8949                 return r;
8950
8951         rdev->accel_working = true;
8952         r = cik_startup(rdev);
8953         if (r) {
8954                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8955                 cik_cp_fini(rdev);
8956                 cik_sdma_fini(rdev);
8957                 cik_irq_fini(rdev);
8958                 sumo_rlc_fini(rdev);
8959                 cik_mec_fini(rdev);
8960                 radeon_wb_fini(rdev);
8961                 radeon_ib_pool_fini(rdev);
8962                 radeon_vm_manager_fini(rdev);
8963                 radeon_irq_kms_fini(rdev);
8964                 cik_pcie_gart_fini(rdev);
8965                 rdev->accel_working = false;
8966         }
8967
8968         /* Don't start up if the MC ucode is missing.
8969          * The default clocks and voltages before the MC ucode
8970          * is loaded are not suffient for advanced operations.
8971          */
8972         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8973                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8974                 return -EINVAL;
8975         }
8976
8977         return 0;
8978 }
8979
8980 /**
8981  * cik_fini - asic specific driver and hw fini
8982  *
8983  * @rdev: radeon_device pointer
8984  *
8985  * Tear down the asic specific driver variables and program the hw
8986  * to an idle state (CIK).
8987  * Called at driver unload.
8988  */
8989 void cik_fini(struct radeon_device *rdev)
8990 {
8991         radeon_pm_fini(rdev);
8992         cik_cp_fini(rdev);
8993         cik_sdma_fini(rdev);
8994         cik_fini_pg(rdev);
8995         cik_fini_cg(rdev);
8996         cik_irq_fini(rdev);
8997         sumo_rlc_fini(rdev);
8998         cik_mec_fini(rdev);
8999         radeon_wb_fini(rdev);
9000         radeon_vm_manager_fini(rdev);
9001         radeon_ib_pool_fini(rdev);
9002         radeon_irq_kms_fini(rdev);
9003         uvd_v1_0_fini(rdev);
9004         radeon_uvd_fini(rdev);
9005         radeon_vce_fini(rdev);
9006         cik_pcie_gart_fini(rdev);
9007         r600_vram_scratch_fini(rdev);
9008         radeon_gem_fini(rdev);
9009         radeon_fence_driver_fini(rdev);
9010         radeon_bo_fini(rdev);
9011         radeon_atombios_fini(rdev);
9012         kfree(rdev->bios);
9013         rdev->bios = NULL;
9014 }
9015
9016 void dce8_program_fmt(struct drm_encoder *encoder)
9017 {
9018         struct drm_device *dev = encoder->dev;
9019         struct radeon_device *rdev = dev->dev_private;
9020         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9021         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9022         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9023         int bpc = 0;
9024         u32 tmp = 0;
9025         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9026
9027         if (connector) {
9028                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9029                 bpc = radeon_get_monitor_bpc(connector);
9030                 dither = radeon_connector->dither;
9031         }
9032
9033         /* LVDS/eDP FMT is set up by atom */
9034         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9035                 return;
9036
9037         /* not needed for analog */
9038         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9039             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9040                 return;
9041
9042         if (bpc == 0)
9043                 return;
9044
9045         switch (bpc) {
9046         case 6:
9047                 if (dither == RADEON_FMT_DITHER_ENABLE)
9048                         /* XXX sort out optimal dither settings */
9049                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9050                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9051                 else
9052                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9053                 break;
9054         case 8:
9055                 if (dither == RADEON_FMT_DITHER_ENABLE)
9056                         /* XXX sort out optimal dither settings */
9057                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9058                                 FMT_RGB_RANDOM_ENABLE |
9059                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9060                 else
9061                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9062                 break;
9063         case 10:
9064                 if (dither == RADEON_FMT_DITHER_ENABLE)
9065                         /* XXX sort out optimal dither settings */
9066                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9067                                 FMT_RGB_RANDOM_ENABLE |
9068                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9069                 else
9070                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9071                 break;
9072         default:
9073                 /* not needed */
9074                 break;
9075         }
9076
9077         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9078 }
9079
9080 /* display watermark setup */
9081 /**
9082  * dce8_line_buffer_adjust - Set up the line buffer
9083  *
9084  * @rdev: radeon_device pointer
9085  * @radeon_crtc: the selected display controller
9086  * @mode: the current display mode on the selected display
9087  * controller
9088  *
9089  * Setup up the line buffer allocation for
9090  * the selected display controller (CIK).
9091  * Returns the line buffer size in pixels.
9092  */
9093 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9094                                    struct radeon_crtc *radeon_crtc,
9095                                    struct drm_display_mode *mode)
9096 {
9097         u32 tmp, buffer_alloc, i;
9098         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9099         /*
9100          * Line Buffer Setup
9101          * There are 6 line buffers, one for each display controllers.
9102          * There are 3 partitions per LB. Select the number of partitions
9103          * to enable based on the display width.  For display widths larger
9104          * than 4096, you need use to use 2 display controllers and combine
9105          * them using the stereo blender.
9106          */
9107         if (radeon_crtc->base.enabled && mode) {
9108                 if (mode->crtc_hdisplay < 1920) {
9109                         tmp = 1;
9110                         buffer_alloc = 2;
9111                 } else if (mode->crtc_hdisplay < 2560) {
9112                         tmp = 2;
9113                         buffer_alloc = 2;
9114                 } else if (mode->crtc_hdisplay < 4096) {
9115                         tmp = 0;
9116                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9117                 } else {
9118                         DRM_DEBUG_KMS("Mode too big for LB!\n");
9119                         tmp = 0;
9120                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9121                 }
9122         } else {
9123                 tmp = 1;
9124                 buffer_alloc = 0;
9125         }
9126
9127         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9128                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9129
9130         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9131                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9132         for (i = 0; i < rdev->usec_timeout; i++) {
9133                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9134                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
9135                         break;
9136                 udelay(1);
9137         }
9138
9139         if (radeon_crtc->base.enabled && mode) {
9140                 switch (tmp) {
9141                 case 0:
9142                 default:
9143                         return 4096 * 2;
9144                 case 1:
9145                         return 1920 * 2;
9146                 case 2:
9147                         return 2560 * 2;
9148                 }
9149         }
9150
9151         /* controller not enabled, so no lb used */
9152         return 0;
9153 }
9154
9155 /**
9156  * cik_get_number_of_dram_channels - get the number of dram channels
9157  *
9158  * @rdev: radeon_device pointer
9159  *
9160  * Look up the number of video ram channels (CIK).
9161  * Used for display watermark bandwidth calculations
9162  * Returns the number of dram channels
9163  */
9164 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9165 {
9166         u32 tmp = RREG32(MC_SHARED_CHMAP);
9167
9168         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9169         case 0:
9170         default:
9171                 return 1;
9172         case 1:
9173                 return 2;
9174         case 2:
9175                 return 4;
9176         case 3:
9177                 return 8;
9178         case 4:
9179                 return 3;
9180         case 5:
9181                 return 6;
9182         case 6:
9183                 return 10;
9184         case 7:
9185                 return 12;
9186         case 8:
9187                 return 16;
9188         }
9189 }
9190
9191 struct dce8_wm_params {
9192         u32 dram_channels; /* number of dram channels */
9193         u32 yclk;          /* bandwidth per dram data pin in kHz */
9194         u32 sclk;          /* engine clock in kHz */
9195         u32 disp_clk;      /* display clock in kHz */
9196         u32 src_width;     /* viewport width */
9197         u32 active_time;   /* active display time in ns */
9198         u32 blank_time;    /* blank time in ns */
9199         bool interlaced;    /* mode is interlaced */
9200         fixed20_12 vsc;    /* vertical scale ratio */
9201         u32 num_heads;     /* number of active crtcs */
9202         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9203         u32 lb_size;       /* line buffer allocated to pipe */
9204         u32 vtaps;         /* vertical scaler taps */
9205 };
9206
9207 /**
9208  * dce8_dram_bandwidth - get the dram bandwidth
9209  *
9210  * @wm: watermark calculation data
9211  *
9212  * Calculate the raw dram bandwidth (CIK).
9213  * Used for display watermark bandwidth calculations
9214  * Returns the dram bandwidth in MBytes/s
9215  */
9216 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9217 {
9218         /* Calculate raw DRAM Bandwidth */
9219         fixed20_12 dram_efficiency; /* 0.7 */
9220         fixed20_12 yclk, dram_channels, bandwidth;
9221         fixed20_12 a;
9222
9223         a.full = dfixed_const(1000);
9224         yclk.full = dfixed_const(wm->yclk);
9225         yclk.full = dfixed_div(yclk, a);
9226         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9227         a.full = dfixed_const(10);
9228         dram_efficiency.full = dfixed_const(7);
9229         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9230         bandwidth.full = dfixed_mul(dram_channels, yclk);
9231         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9232
9233         return dfixed_trunc(bandwidth);
9234 }
9235
9236 /**
9237  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9238  *
9239  * @wm: watermark calculation data
9240  *
9241  * Calculate the dram bandwidth used for display (CIK).
9242  * Used for display watermark bandwidth calculations
9243  * Returns the dram bandwidth for display in MBytes/s
9244  */
9245 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9246 {
9247         /* Calculate DRAM Bandwidth and the part allocated to display. */
9248         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9249         fixed20_12 yclk, dram_channels, bandwidth;
9250         fixed20_12 a;
9251
9252         a.full = dfixed_const(1000);
9253         yclk.full = dfixed_const(wm->yclk);
9254         yclk.full = dfixed_div(yclk, a);
9255         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9256         a.full = dfixed_const(10);
9257         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9258         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9259         bandwidth.full = dfixed_mul(dram_channels, yclk);
9260         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9261
9262         return dfixed_trunc(bandwidth);
9263 }
9264
9265 /**
9266  * dce8_data_return_bandwidth - get the data return bandwidth
9267  *
9268  * @wm: watermark calculation data
9269  *
9270  * Calculate the data return bandwidth used for display (CIK).
9271  * Used for display watermark bandwidth calculations
9272  * Returns the data return bandwidth in MBytes/s
9273  */
9274 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9275 {
9276         /* Calculate the display Data return Bandwidth */
9277         fixed20_12 return_efficiency; /* 0.8 */
9278         fixed20_12 sclk, bandwidth;
9279         fixed20_12 a;
9280
9281         a.full = dfixed_const(1000);
9282         sclk.full = dfixed_const(wm->sclk);
9283         sclk.full = dfixed_div(sclk, a);
9284         a.full = dfixed_const(10);
9285         return_efficiency.full = dfixed_const(8);
9286         return_efficiency.full = dfixed_div(return_efficiency, a);
9287         a.full = dfixed_const(32);
9288         bandwidth.full = dfixed_mul(a, sclk);
9289         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9290
9291         return dfixed_trunc(bandwidth);
9292 }
9293
9294 /**
9295  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9296  *
9297  * @wm: watermark calculation data
9298  *
9299  * Calculate the dmif bandwidth used for display (CIK).
9300  * Used for display watermark bandwidth calculations
9301  * Returns the dmif bandwidth in MBytes/s
9302  */
9303 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9304 {
9305         /* Calculate the DMIF Request Bandwidth */
9306         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9307         fixed20_12 disp_clk, bandwidth;
9308         fixed20_12 a, b;
9309
9310         a.full = dfixed_const(1000);
9311         disp_clk.full = dfixed_const(wm->disp_clk);
9312         disp_clk.full = dfixed_div(disp_clk, a);
9313         a.full = dfixed_const(32);
9314         b.full = dfixed_mul(a, disp_clk);
9315
9316         a.full = dfixed_const(10);
9317         disp_clk_request_efficiency.full = dfixed_const(8);
9318         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9319
9320         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9321
9322         return dfixed_trunc(bandwidth);
9323 }
9324
9325 /**
9326  * dce8_available_bandwidth - get the min available bandwidth
9327  *
9328  * @wm: watermark calculation data
9329  *
9330  * Calculate the min available bandwidth used for display (CIK).
9331  * Used for display watermark bandwidth calculations
9332  * Returns the min available bandwidth in MBytes/s
9333  */
9334 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9335 {
9336         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9337         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9338         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9339         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9340
9341         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9342 }
9343
9344 /**
9345  * dce8_average_bandwidth - get the average available bandwidth
9346  *
9347  * @wm: watermark calculation data
9348  *
9349  * Calculate the average available bandwidth used for display (CIK).
9350  * Used for display watermark bandwidth calculations
9351  * Returns the average available bandwidth in MBytes/s
9352  */
9353 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9354 {
9355         /* Calculate the display mode Average Bandwidth
9356          * DisplayMode should contain the source and destination dimensions,
9357          * timing, etc.
9358          */
9359         fixed20_12 bpp;
9360         fixed20_12 line_time;
9361         fixed20_12 src_width;
9362         fixed20_12 bandwidth;
9363         fixed20_12 a;
9364
9365         a.full = dfixed_const(1000);
9366         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9367         line_time.full = dfixed_div(line_time, a);
9368         bpp.full = dfixed_const(wm->bytes_per_pixel);
9369         src_width.full = dfixed_const(wm->src_width);
9370         bandwidth.full = dfixed_mul(src_width, bpp);
9371         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9372         bandwidth.full = dfixed_div(bandwidth, line_time);
9373
9374         return dfixed_trunc(bandwidth);
9375 }
9376
9377 /**
9378  * dce8_latency_watermark - get the latency watermark
9379  *
9380  * @wm: watermark calculation data
9381  *
9382  * Calculate the latency watermark (CIK).
9383  * Used for display watermark bandwidth calculations
9384  * Returns the latency watermark in ns
9385  */
9386 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9387 {
9388         /* First calculate the latency in ns */
9389         u32 mc_latency = 2000; /* 2000 ns. */
9390         u32 available_bandwidth = dce8_available_bandwidth(wm);
9391         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9392         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9393         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9394         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9395                 (wm->num_heads * cursor_line_pair_return_time);
9396         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9397         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9398         u32 tmp, dmif_size = 12288;
9399         fixed20_12 a, b, c;
9400
9401         if (wm->num_heads == 0)
9402                 return 0;
9403
9404         a.full = dfixed_const(2);
9405         b.full = dfixed_const(1);
9406         if ((wm->vsc.full > a.full) ||
9407             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9408             (wm->vtaps >= 5) ||
9409             ((wm->vsc.full >= a.full) && wm->interlaced))
9410                 max_src_lines_per_dst_line = 4;
9411         else
9412                 max_src_lines_per_dst_line = 2;
9413
9414         a.full = dfixed_const(available_bandwidth);
9415         b.full = dfixed_const(wm->num_heads);
9416         a.full = dfixed_div(a, b);
9417
9418         b.full = dfixed_const(mc_latency + 512);
9419         c.full = dfixed_const(wm->disp_clk);
9420         b.full = dfixed_div(b, c);
9421
9422         c.full = dfixed_const(dmif_size);
9423         b.full = dfixed_div(c, b);
9424
9425         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9426
9427         b.full = dfixed_const(1000);
9428         c.full = dfixed_const(wm->disp_clk);
9429         b.full = dfixed_div(c, b);
9430         c.full = dfixed_const(wm->bytes_per_pixel);
9431         b.full = dfixed_mul(b, c);
9432
9433         lb_fill_bw = min(tmp, dfixed_trunc(b));
9434
9435         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9436         b.full = dfixed_const(1000);
9437         c.full = dfixed_const(lb_fill_bw);
9438         b.full = dfixed_div(c, b);
9439         a.full = dfixed_div(a, b);
9440         line_fill_time = dfixed_trunc(a);
9441
9442         if (line_fill_time < wm->active_time)
9443                 return latency;
9444         else
9445                 return latency + (line_fill_time - wm->active_time);
9446
9447 }
9448
9449 /**
9450  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9451  * average and available dram bandwidth
9452  *
9453  * @wm: watermark calculation data
9454  *
9455  * Check if the display average bandwidth fits in the display
9456  * dram bandwidth (CIK).
9457  * Used for display watermark bandwidth calculations
9458  * Returns true if the display fits, false if not.
9459  */
9460 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9461 {
9462         if (dce8_average_bandwidth(wm) <=
9463             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9464                 return true;
9465         else
9466                 return false;
9467 }
9468
9469 /**
9470  * dce8_average_bandwidth_vs_available_bandwidth - check
9471  * average and available bandwidth
9472  *
9473  * @wm: watermark calculation data
9474  *
9475  * Check if the display average bandwidth fits in the display
9476  * available bandwidth (CIK).
9477  * Used for display watermark bandwidth calculations
9478  * Returns true if the display fits, false if not.
9479  */
9480 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9481 {
9482         if (dce8_average_bandwidth(wm) <=
9483             (dce8_available_bandwidth(wm) / wm->num_heads))
9484                 return true;
9485         else
9486                 return false;
9487 }
9488
9489 /**
9490  * dce8_check_latency_hiding - check latency hiding
9491  *
9492  * @wm: watermark calculation data
9493  *
9494  * Check latency hiding (CIK).
9495  * Used for display watermark bandwidth calculations
9496  * Returns true if the display fits, false if not.
9497  */
9498 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9499 {
9500         u32 lb_partitions = wm->lb_size / wm->src_width;
9501         u32 line_time = wm->active_time + wm->blank_time;
9502         u32 latency_tolerant_lines;
9503         u32 latency_hiding;
9504         fixed20_12 a;
9505
9506         a.full = dfixed_const(1);
9507         if (wm->vsc.full > a.full)
9508                 latency_tolerant_lines = 1;
9509         else {
9510                 if (lb_partitions <= (wm->vtaps + 1))
9511                         latency_tolerant_lines = 1;
9512                 else
9513                         latency_tolerant_lines = 2;
9514         }
9515
9516         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9517
9518         if (dce8_latency_watermark(wm) <= latency_hiding)
9519                 return true;
9520         else
9521                 return false;
9522 }
9523
9524 /**
9525  * dce8_program_watermarks - program display watermarks
9526  *
9527  * @rdev: radeon_device pointer
9528  * @radeon_crtc: the selected display controller
9529  * @lb_size: line buffer size
9530  * @num_heads: number of display controllers in use
9531  *
9532  * Calculate and program the display watermarks for the
9533  * selected display controller (CIK).
9534  */
9535 static void dce8_program_watermarks(struct radeon_device *rdev,
9536                                     struct radeon_crtc *radeon_crtc,
9537                                     u32 lb_size, u32 num_heads)
9538 {
9539         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9540         struct dce8_wm_params wm_low, wm_high;
9541         u32 pixel_period;
9542         u32 line_time = 0;
9543         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9544         u32 tmp, wm_mask;
9545
9546         if (radeon_crtc->base.enabled && num_heads && mode) {
9547                 pixel_period = 1000000 / (u32)mode->clock;
9548                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9549
9550                 /* watermark for high clocks */
9551                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9552                     rdev->pm.dpm_enabled) {
9553                         wm_high.yclk =
9554                                 radeon_dpm_get_mclk(rdev, false) * 10;
9555                         wm_high.sclk =
9556                                 radeon_dpm_get_sclk(rdev, false) * 10;
9557                 } else {
9558                         wm_high.yclk = rdev->pm.current_mclk * 10;
9559                         wm_high.sclk = rdev->pm.current_sclk * 10;
9560                 }
9561
9562                 wm_high.disp_clk = mode->clock;
9563                 wm_high.src_width = mode->crtc_hdisplay;
9564                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9565                 wm_high.blank_time = line_time - wm_high.active_time;
9566                 wm_high.interlaced = false;
9567                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9568                         wm_high.interlaced = true;
9569                 wm_high.vsc = radeon_crtc->vsc;
9570                 wm_high.vtaps = 1;
9571                 if (radeon_crtc->rmx_type != RMX_OFF)
9572                         wm_high.vtaps = 2;
9573                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9574                 wm_high.lb_size = lb_size;
9575                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9576                 wm_high.num_heads = num_heads;
9577
9578                 /* set for high clocks */
9579                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9580
9581                 /* possibly force display priority to high */
9582                 /* should really do this at mode validation time... */
9583                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9584                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9585                     !dce8_check_latency_hiding(&wm_high) ||
9586                     (rdev->disp_priority == 2)) {
9587                         DRM_DEBUG_KMS("force priority to high\n");
9588                 }
9589
9590                 /* watermark for low clocks */
9591                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9592                     rdev->pm.dpm_enabled) {
9593                         wm_low.yclk =
9594                                 radeon_dpm_get_mclk(rdev, true) * 10;
9595                         wm_low.sclk =
9596                                 radeon_dpm_get_sclk(rdev, true) * 10;
9597                 } else {
9598                         wm_low.yclk = rdev->pm.current_mclk * 10;
9599                         wm_low.sclk = rdev->pm.current_sclk * 10;
9600                 }
9601
9602                 wm_low.disp_clk = mode->clock;
9603                 wm_low.src_width = mode->crtc_hdisplay;
9604                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9605                 wm_low.blank_time = line_time - wm_low.active_time;
9606                 wm_low.interlaced = false;
9607                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9608                         wm_low.interlaced = true;
9609                 wm_low.vsc = radeon_crtc->vsc;
9610                 wm_low.vtaps = 1;
9611                 if (radeon_crtc->rmx_type != RMX_OFF)
9612                         wm_low.vtaps = 2;
9613                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9614                 wm_low.lb_size = lb_size;
9615                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9616                 wm_low.num_heads = num_heads;
9617
9618                 /* set for low clocks */
9619                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9620
9621                 /* possibly force display priority to high */
9622                 /* should really do this at mode validation time... */
9623                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9624                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9625                     !dce8_check_latency_hiding(&wm_low) ||
9626                     (rdev->disp_priority == 2)) {
9627                         DRM_DEBUG_KMS("force priority to high\n");
9628                 }
9629
9630                 /* Save number of lines the linebuffer leads before the scanout */
9631                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9632         }
9633
9634         /* select wm A */
9635         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9636         tmp = wm_mask;
9637         tmp &= ~LATENCY_WATERMARK_MASK(3);
9638         tmp |= LATENCY_WATERMARK_MASK(1);
9639         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9640         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9641                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9642                 LATENCY_HIGH_WATERMARK(line_time)));
9643         /* select wm B */
9644         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9645         tmp &= ~LATENCY_WATERMARK_MASK(3);
9646         tmp |= LATENCY_WATERMARK_MASK(2);
9647         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9648         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9649                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9650                 LATENCY_HIGH_WATERMARK(line_time)));
9651         /* restore original selection */
9652         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9653
9654         /* save values for DPM */
9655         radeon_crtc->line_time = line_time;
9656         radeon_crtc->wm_high = latency_watermark_a;
9657         radeon_crtc->wm_low = latency_watermark_b;
9658 }
9659
9660 /**
9661  * dce8_bandwidth_update - program display watermarks
9662  *
9663  * @rdev: radeon_device pointer
9664  *
9665  * Calculate and program the display watermarks and line
9666  * buffer allocation (CIK).
9667  */
9668 void dce8_bandwidth_update(struct radeon_device *rdev)
9669 {
9670         struct drm_display_mode *mode = NULL;
9671         u32 num_heads = 0, lb_size;
9672         int i;
9673
9674         if (!rdev->mode_info.mode_config_initialized)
9675                 return;
9676
9677         radeon_update_display_priority(rdev);
9678
9679         for (i = 0; i < rdev->num_crtc; i++) {
9680                 if (rdev->mode_info.crtcs[i]->base.enabled)
9681                         num_heads++;
9682         }
9683         for (i = 0; i < rdev->num_crtc; i++) {
9684                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9685                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9686                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9687         }
9688 }
9689
9690 /**
9691  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9692  *
9693  * @rdev: radeon_device pointer
9694  *
9695  * Fetches a GPU clock counter snapshot (SI).
9696  * Returns the 64 bit clock counter snapshot.
9697  */
9698 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9699 {
9700         uint64_t clock;
9701
9702         mutex_lock(&rdev->gpu_clock_mutex);
9703         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9704         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9705                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9706         mutex_unlock(&rdev->gpu_clock_mutex);
9707         return clock;
9708 }
9709
9710 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9711                               u32 cntl_reg, u32 status_reg)
9712 {
9713         int r, i;
9714         struct atom_clock_dividers dividers;
9715         uint32_t tmp;
9716
9717         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9718                                            clock, false, &dividers);
9719         if (r)
9720                 return r;
9721
9722         tmp = RREG32_SMC(cntl_reg);
9723         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9724         tmp |= dividers.post_divider;
9725         WREG32_SMC(cntl_reg, tmp);
9726
9727         for (i = 0; i < 100; i++) {
9728                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9729                         break;
9730                 mdelay(10);
9731         }
9732         if (i == 100)
9733                 return -ETIMEDOUT;
9734
9735         return 0;
9736 }
9737
9738 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9739 {
9740         int r = 0;
9741
9742         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9743         if (r)
9744                 return r;
9745
9746         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9747         return r;
9748 }
9749
9750 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9751 {
9752         int r, i;
9753         struct atom_clock_dividers dividers;
9754         u32 tmp;
9755
9756         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9757                                            ecclk, false, &dividers);
9758         if (r)
9759                 return r;
9760
9761         for (i = 0; i < 100; i++) {
9762                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9763                         break;
9764                 mdelay(10);
9765         }
9766         if (i == 100)
9767                 return -ETIMEDOUT;
9768
9769         tmp = RREG32_SMC(CG_ECLK_CNTL);
9770         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9771         tmp |= dividers.post_divider;
9772         WREG32_SMC(CG_ECLK_CNTL, tmp);
9773
9774         for (i = 0; i < 100; i++) {
9775                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9776                         break;
9777                 mdelay(10);
9778         }
9779         if (i == 100)
9780                 return -ETIMEDOUT;
9781
9782         return 0;
9783 }
9784
9785 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9786 {
9787         struct pci_dev *root = rdev->pdev->bus->self;
9788         int bridge_pos, gpu_pos;
9789         u32 speed_cntl, mask, current_data_rate;
9790         int ret, i;
9791         u16 tmp16;
9792
9793         if (pci_is_root_bus(rdev->pdev->bus))
9794                 return;
9795
9796         if (radeon_pcie_gen2 == 0)
9797                 return;
9798
9799         if (rdev->flags & RADEON_IS_IGP)
9800                 return;
9801
9802         if (!(rdev->flags & RADEON_IS_PCIE))
9803                 return;
9804
9805         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9806         if (ret != 0)
9807                 return;
9808
9809         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9810                 return;
9811
9812         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9813         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9814                 LC_CURRENT_DATA_RATE_SHIFT;
9815         if (mask & DRM_PCIE_SPEED_80) {
9816                 if (current_data_rate == 2) {
9817                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9818                         return;
9819                 }
9820                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9821         } else if (mask & DRM_PCIE_SPEED_50) {
9822                 if (current_data_rate == 1) {
9823                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9824                         return;
9825                 }
9826                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9827         }
9828
9829         bridge_pos = pci_pcie_cap(root);
9830         if (!bridge_pos)
9831                 return;
9832
9833         gpu_pos = pci_pcie_cap(rdev->pdev);
9834         if (!gpu_pos)
9835                 return;
9836
9837         if (mask & DRM_PCIE_SPEED_80) {
9838                 /* re-try equalization if gen3 is not already enabled */
9839                 if (current_data_rate != 2) {
9840                         u16 bridge_cfg, gpu_cfg;
9841                         u16 bridge_cfg2, gpu_cfg2;
9842                         u32 max_lw, current_lw, tmp;
9843
9844                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9845                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9846
9847                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9848                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9849
9850                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9851                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9852
9853                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9854                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9855                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9856
9857                         if (current_lw < max_lw) {
9858                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9859                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9860                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9861                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9862                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9863                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9864                                 }
9865                         }
9866
9867                         for (i = 0; i < 10; i++) {
9868                                 /* check status */
9869                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9870                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9871                                         break;
9872
9873                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9874                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9875
9876                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9877                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9878
9879                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9880                                 tmp |= LC_SET_QUIESCE;
9881                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9882
9883                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9884                                 tmp |= LC_REDO_EQ;
9885                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9886
9887                                 mdelay(100);
9888
9889                                 /* linkctl */
9890                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9891                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9892                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9893                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9894
9895                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9896                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9897                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9898                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9899
9900                                 /* linkctl2 */
9901                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9902                                 tmp16 &= ~((1 << 4) | (7 << 9));
9903                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9904                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9905
9906                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9907                                 tmp16 &= ~((1 << 4) | (7 << 9));
9908                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9909                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9910
9911                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9912                                 tmp &= ~LC_SET_QUIESCE;
9913                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9914                         }
9915                 }
9916         }
9917
9918         /* set the link speed */
9919         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9920         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9921         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9922
9923         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9924         tmp16 &= ~0xf;
9925         if (mask & DRM_PCIE_SPEED_80)
9926                 tmp16 |= 3; /* gen3 */
9927         else if (mask & DRM_PCIE_SPEED_50)
9928                 tmp16 |= 2; /* gen2 */
9929         else
9930                 tmp16 |= 1; /* gen1 */
9931         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9932
9933         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9934         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9935         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9936
9937         for (i = 0; i < rdev->usec_timeout; i++) {
9938                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9939                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9940                         break;
9941                 udelay(1);
9942         }
9943 }
9944
9945 static void cik_program_aspm(struct radeon_device *rdev)
9946 {
9947         u32 data, orig;
9948         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9949         bool disable_clkreq = false;
9950
9951         if (radeon_aspm == 0)
9952                 return;
9953
9954         /* XXX double check IGPs */
9955         if (rdev->flags & RADEON_IS_IGP)
9956                 return;
9957
9958         if (!(rdev->flags & RADEON_IS_PCIE))
9959                 return;
9960
9961         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9962         data &= ~LC_XMIT_N_FTS_MASK;
9963         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9964         if (orig != data)
9965                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9966
9967         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9968         data |= LC_GO_TO_RECOVERY;
9969         if (orig != data)
9970                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9971
9972         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9973         data |= P_IGNORE_EDB_ERR;
9974         if (orig != data)
9975                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9976
9977         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9978         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9979         data |= LC_PMI_TO_L1_DIS;
9980         if (!disable_l0s)
9981                 data |= LC_L0S_INACTIVITY(7);
9982
9983         if (!disable_l1) {
9984                 data |= LC_L1_INACTIVITY(7);
9985                 data &= ~LC_PMI_TO_L1_DIS;
9986                 if (orig != data)
9987                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9988
9989                 if (!disable_plloff_in_l1) {
9990                         bool clk_req_support;
9991
9992                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9993                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9994                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9995                         if (orig != data)
9996                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9997
9998                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9999                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10000                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10001                         if (orig != data)
10002                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
10003
10004                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
10005                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10006                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10007                         if (orig != data)
10008                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
10009
10010                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
10011                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10012                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10013                         if (orig != data)
10014                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
10015
10016                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
10017                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
10018                         data |= LC_DYN_LANES_PWR_STATE(3);
10019                         if (orig != data)
10020                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
10021
10022                         if (!disable_clkreq &&
10023                             !pci_is_root_bus(rdev->pdev->bus)) {
10024                                 struct pci_dev *root = rdev->pdev->bus->self;
10025                                 u32 lnkcap;
10026
10027                                 clk_req_support = false;
10028                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10029                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10030                                         clk_req_support = true;
10031                         } else {
10032                                 clk_req_support = false;
10033                         }
10034
10035                         if (clk_req_support) {
10036                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10037                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10038                                 if (orig != data)
10039                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10040
10041                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
10042                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10043                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10044                                 if (orig != data)
10045                                         WREG32_SMC(THM_CLK_CNTL, data);
10046
10047                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
10048                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10049                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10050                                 if (orig != data)
10051                                         WREG32_SMC(MISC_CLK_CTRL, data);
10052
10053                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10054                                 data &= ~BCLK_AS_XCLK;
10055                                 if (orig != data)
10056                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
10057
10058                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10059                                 data &= ~FORCE_BIF_REFCLK_EN;
10060                                 if (orig != data)
10061                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10062
10063                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10064                                 data &= ~MPLL_CLKOUT_SEL_MASK;
10065                                 data |= MPLL_CLKOUT_SEL(4);
10066                                 if (orig != data)
10067                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10068                         }
10069                 }
10070         } else {
10071                 if (orig != data)
10072                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10073         }
10074
10075         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10076         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10077         if (orig != data)
10078                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
10079
10080         if (!disable_l0s) {
10081                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10082                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10083                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10084                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10085                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10086                                 data &= ~LC_L0S_INACTIVITY_MASK;
10087                                 if (orig != data)
10088                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10089                         }
10090                 }
10091         }
10092 }