Kernel bump from 4.1.3-rt to 4.1.7-rt.
[kvmfornfv.git] / kernel / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155                                   u32 reg, u32 *val)
156 {
157         switch (reg) {
158         case GRBM_STATUS:
159         case GRBM_STATUS2:
160         case GRBM_STATUS_SE0:
161         case GRBM_STATUS_SE1:
162         case GRBM_STATUS_SE2:
163         case GRBM_STATUS_SE3:
164         case SRBM_STATUS:
165         case SRBM_STATUS2:
166         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168         case UVD_STATUS:
169         /* TODO VCE */
170                 *val = RREG32(reg);
171                 return 0;
172         default:
173                 return -EINVAL;
174         }
175 }
176
177 /* get temperature in millidegrees */
178 int ci_get_temp(struct radeon_device *rdev)
179 {
180         u32 temp;
181         int actual_temp = 0;
182
183         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
184                 CTF_TEMP_SHIFT;
185
186         if (temp & 0x200)
187                 actual_temp = 255;
188         else
189                 actual_temp = temp & 0x1ff;
190
191         actual_temp = actual_temp * 1000;
192
193         return actual_temp;
194 }
195
196 /* get temperature in millidegrees */
197 int kv_get_temp(struct radeon_device *rdev)
198 {
199         u32 temp;
200         int actual_temp = 0;
201
202         temp = RREG32_SMC(0xC0300E0C);
203
204         if (temp)
205                 actual_temp = (temp / 8) - 49;
206         else
207                 actual_temp = 0;
208
209         actual_temp = actual_temp * 1000;
210
211         return actual_temp;
212 }
213
214 /*
215  * Indirect registers accessor
216  */
217 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
218 {
219         unsigned long flags;
220         u32 r;
221
222         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
223         WREG32(PCIE_INDEX, reg);
224         (void)RREG32(PCIE_INDEX);
225         r = RREG32(PCIE_DATA);
226         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
227         return r;
228 }
229
230 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
231 {
232         unsigned long flags;
233
234         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
235         WREG32(PCIE_INDEX, reg);
236         (void)RREG32(PCIE_INDEX);
237         WREG32(PCIE_DATA, v);
238         (void)RREG32(PCIE_DATA);
239         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
240 }
241
242 static const u32 spectre_rlc_save_restore_register_list[] =
243 {
244         (0x0e00 << 16) | (0xc12c >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc140 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc150 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc15c >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc168 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc170 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc178 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc204 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc2b4 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc2b8 >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2bc >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2c0 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0x8228 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0x829c >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0x869c >> 2),
273         0x00000000,
274         (0x0600 << 16) | (0x98f4 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x98f8 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x9900 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0xc260 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x90e8 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3c000 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0x3c00c >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0x8c1c >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0x9700 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xcd20 >> 2),
293         0x00000000,
294         (0x4e00 << 16) | (0xcd20 >> 2),
295         0x00000000,
296         (0x5e00 << 16) | (0xcd20 >> 2),
297         0x00000000,
298         (0x6e00 << 16) | (0xcd20 >> 2),
299         0x00000000,
300         (0x7e00 << 16) | (0xcd20 >> 2),
301         0x00000000,
302         (0x8e00 << 16) | (0xcd20 >> 2),
303         0x00000000,
304         (0x9e00 << 16) | (0xcd20 >> 2),
305         0x00000000,
306         (0xae00 << 16) | (0xcd20 >> 2),
307         0x00000000,
308         (0xbe00 << 16) | (0xcd20 >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0x89bc >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0x8900 >> 2),
313         0x00000000,
314         0x3,
315         (0x0e00 << 16) | (0xc130 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xc134 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xc1fc >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xc208 >> 2),
322         0x00000000,
323         (0x0e00 << 16) | (0xc264 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0xc268 >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0xc26c >> 2),
328         0x00000000,
329         (0x0e00 << 16) | (0xc270 >> 2),
330         0x00000000,
331         (0x0e00 << 16) | (0xc274 >> 2),
332         0x00000000,
333         (0x0e00 << 16) | (0xc278 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0xc27c >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0xc280 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0xc284 >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0xc288 >> 2),
342         0x00000000,
343         (0x0e00 << 16) | (0xc28c >> 2),
344         0x00000000,
345         (0x0e00 << 16) | (0xc290 >> 2),
346         0x00000000,
347         (0x0e00 << 16) | (0xc294 >> 2),
348         0x00000000,
349         (0x0e00 << 16) | (0xc298 >> 2),
350         0x00000000,
351         (0x0e00 << 16) | (0xc29c >> 2),
352         0x00000000,
353         (0x0e00 << 16) | (0xc2a0 >> 2),
354         0x00000000,
355         (0x0e00 << 16) | (0xc2a4 >> 2),
356         0x00000000,
357         (0x0e00 << 16) | (0xc2a8 >> 2),
358         0x00000000,
359         (0x0e00 << 16) | (0xc2ac  >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc2b0 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0x301d0 >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0x30238 >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0x30250 >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0x30254 >> 2),
370         0x00000000,
371         (0x0e00 << 16) | (0x30258 >> 2),
372         0x00000000,
373         (0x0e00 << 16) | (0x3025c >> 2),
374         0x00000000,
375         (0x4e00 << 16) | (0xc900 >> 2),
376         0x00000000,
377         (0x5e00 << 16) | (0xc900 >> 2),
378         0x00000000,
379         (0x6e00 << 16) | (0xc900 >> 2),
380         0x00000000,
381         (0x7e00 << 16) | (0xc900 >> 2),
382         0x00000000,
383         (0x8e00 << 16) | (0xc900 >> 2),
384         0x00000000,
385         (0x9e00 << 16) | (0xc900 >> 2),
386         0x00000000,
387         (0xae00 << 16) | (0xc900 >> 2),
388         0x00000000,
389         (0xbe00 << 16) | (0xc900 >> 2),
390         0x00000000,
391         (0x4e00 << 16) | (0xc904 >> 2),
392         0x00000000,
393         (0x5e00 << 16) | (0xc904 >> 2),
394         0x00000000,
395         (0x6e00 << 16) | (0xc904 >> 2),
396         0x00000000,
397         (0x7e00 << 16) | (0xc904 >> 2),
398         0x00000000,
399         (0x8e00 << 16) | (0xc904 >> 2),
400         0x00000000,
401         (0x9e00 << 16) | (0xc904 >> 2),
402         0x00000000,
403         (0xae00 << 16) | (0xc904 >> 2),
404         0x00000000,
405         (0xbe00 << 16) | (0xc904 >> 2),
406         0x00000000,
407         (0x4e00 << 16) | (0xc908 >> 2),
408         0x00000000,
409         (0x5e00 << 16) | (0xc908 >> 2),
410         0x00000000,
411         (0x6e00 << 16) | (0xc908 >> 2),
412         0x00000000,
413         (0x7e00 << 16) | (0xc908 >> 2),
414         0x00000000,
415         (0x8e00 << 16) | (0xc908 >> 2),
416         0x00000000,
417         (0x9e00 << 16) | (0xc908 >> 2),
418         0x00000000,
419         (0xae00 << 16) | (0xc908 >> 2),
420         0x00000000,
421         (0xbe00 << 16) | (0xc908 >> 2),
422         0x00000000,
423         (0x4e00 << 16) | (0xc90c >> 2),
424         0x00000000,
425         (0x5e00 << 16) | (0xc90c >> 2),
426         0x00000000,
427         (0x6e00 << 16) | (0xc90c >> 2),
428         0x00000000,
429         (0x7e00 << 16) | (0xc90c >> 2),
430         0x00000000,
431         (0x8e00 << 16) | (0xc90c >> 2),
432         0x00000000,
433         (0x9e00 << 16) | (0xc90c >> 2),
434         0x00000000,
435         (0xae00 << 16) | (0xc90c >> 2),
436         0x00000000,
437         (0xbe00 << 16) | (0xc90c >> 2),
438         0x00000000,
439         (0x4e00 << 16) | (0xc910 >> 2),
440         0x00000000,
441         (0x5e00 << 16) | (0xc910 >> 2),
442         0x00000000,
443         (0x6e00 << 16) | (0xc910 >> 2),
444         0x00000000,
445         (0x7e00 << 16) | (0xc910 >> 2),
446         0x00000000,
447         (0x8e00 << 16) | (0xc910 >> 2),
448         0x00000000,
449         (0x9e00 << 16) | (0xc910 >> 2),
450         0x00000000,
451         (0xae00 << 16) | (0xc910 >> 2),
452         0x00000000,
453         (0xbe00 << 16) | (0xc910 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0xc99c >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x9834 >> 2),
458         0x00000000,
459         (0x0000 << 16) | (0x30f00 >> 2),
460         0x00000000,
461         (0x0001 << 16) | (0x30f00 >> 2),
462         0x00000000,
463         (0x0000 << 16) | (0x30f04 >> 2),
464         0x00000000,
465         (0x0001 << 16) | (0x30f04 >> 2),
466         0x00000000,
467         (0x0000 << 16) | (0x30f08 >> 2),
468         0x00000000,
469         (0x0001 << 16) | (0x30f08 >> 2),
470         0x00000000,
471         (0x0000 << 16) | (0x30f0c >> 2),
472         0x00000000,
473         (0x0001 << 16) | (0x30f0c >> 2),
474         0x00000000,
475         (0x0600 << 16) | (0x9b7c >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8a14 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0x8a18 >> 2),
480         0x00000000,
481         (0x0600 << 16) | (0x30a00 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0x8bf0 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0x8bcc >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x8b24 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0x30a04 >> 2),
490         0x00000000,
491         (0x0600 << 16) | (0x30a10 >> 2),
492         0x00000000,
493         (0x0600 << 16) | (0x30a14 >> 2),
494         0x00000000,
495         (0x0600 << 16) | (0x30a18 >> 2),
496         0x00000000,
497         (0x0600 << 16) | (0x30a2c >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xc700 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xc704 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xc708 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xc768 >> 2),
506         0x00000000,
507         (0x0400 << 16) | (0xc770 >> 2),
508         0x00000000,
509         (0x0400 << 16) | (0xc774 >> 2),
510         0x00000000,
511         (0x0400 << 16) | (0xc778 >> 2),
512         0x00000000,
513         (0x0400 << 16) | (0xc77c >> 2),
514         0x00000000,
515         (0x0400 << 16) | (0xc780 >> 2),
516         0x00000000,
517         (0x0400 << 16) | (0xc784 >> 2),
518         0x00000000,
519         (0x0400 << 16) | (0xc788 >> 2),
520         0x00000000,
521         (0x0400 << 16) | (0xc78c >> 2),
522         0x00000000,
523         (0x0400 << 16) | (0xc798 >> 2),
524         0x00000000,
525         (0x0400 << 16) | (0xc79c >> 2),
526         0x00000000,
527         (0x0400 << 16) | (0xc7a0 >> 2),
528         0x00000000,
529         (0x0400 << 16) | (0xc7a4 >> 2),
530         0x00000000,
531         (0x0400 << 16) | (0xc7a8 >> 2),
532         0x00000000,
533         (0x0400 << 16) | (0xc7ac >> 2),
534         0x00000000,
535         (0x0400 << 16) | (0xc7b0 >> 2),
536         0x00000000,
537         (0x0400 << 16) | (0xc7b4 >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0x9100 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0x3c010 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0x92a8 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0x92ac >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x92b4 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x92b8 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x92bc >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x92c0 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x92c4 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x92c8 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x92cc >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x92d0 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x8c00 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x8c04 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x8c20 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x8c38 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x8c3c >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0xae00 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x9604 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0xac08 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0xac0c >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0xac10 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0xac14 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0xac58 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0xac68 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0xac6c >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0xac70 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0xac74 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0xac78 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0xac7c >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xac80 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xac84 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xac88 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xac8c >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x970c >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x9714 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0x9718 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x971c >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x31068 >> 2),
616         0x00000000,
617         (0x4e00 << 16) | (0x31068 >> 2),
618         0x00000000,
619         (0x5e00 << 16) | (0x31068 >> 2),
620         0x00000000,
621         (0x6e00 << 16) | (0x31068 >> 2),
622         0x00000000,
623         (0x7e00 << 16) | (0x31068 >> 2),
624         0x00000000,
625         (0x8e00 << 16) | (0x31068 >> 2),
626         0x00000000,
627         (0x9e00 << 16) | (0x31068 >> 2),
628         0x00000000,
629         (0xae00 << 16) | (0x31068 >> 2),
630         0x00000000,
631         (0xbe00 << 16) | (0x31068 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0xcd10 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0xcd14 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x88b0 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x88b4 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x88b8 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x88bc >> 2),
644         0x00000000,
645         (0x0400 << 16) | (0x89c0 >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0x88c4 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0x88c8 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0x88d0 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0x88d4 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0x88d8 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0x8980 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x30938 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x3093c >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x30940 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0x89a0 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x30900 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x30904 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0x89b4 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x3c210 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0x3c214 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x3c218 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8904 >> 2),
680         0x00000000,
681         0x5,
682         (0x0e00 << 16) | (0x8c28 >> 2),
683         (0x0e00 << 16) | (0x8c2c >> 2),
684         (0x0e00 << 16) | (0x8c30 >> 2),
685         (0x0e00 << 16) | (0x8c34 >> 2),
686         (0x0e00 << 16) | (0x9600 >> 2),
687 };
688
689 static const u32 kalindi_rlc_save_restore_register_list[] =
690 {
691         (0x0e00 << 16) | (0xc12c >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc140 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc150 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0xc15c >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0xc168 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0xc170 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xc204 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0xc2b4 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0xc2b8 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0xc2bc >> 2),
710         0x00000000,
711         (0x0e00 << 16) | (0xc2c0 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0x8228 >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0x829c >> 2),
716         0x00000000,
717         (0x0e00 << 16) | (0x869c >> 2),
718         0x00000000,
719         (0x0600 << 16) | (0x98f4 >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0x98f8 >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0x9900 >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc260 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0x90e8 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0x3c000 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0x3c00c >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0x8c1c >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0x9700 >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0xcd20 >> 2),
738         0x00000000,
739         (0x4e00 << 16) | (0xcd20 >> 2),
740         0x00000000,
741         (0x5e00 << 16) | (0xcd20 >> 2),
742         0x00000000,
743         (0x6e00 << 16) | (0xcd20 >> 2),
744         0x00000000,
745         (0x7e00 << 16) | (0xcd20 >> 2),
746         0x00000000,
747         (0x0e00 << 16) | (0x89bc >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0x8900 >> 2),
750         0x00000000,
751         0x3,
752         (0x0e00 << 16) | (0xc130 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0xc134 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0xc1fc >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0xc208 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0xc264 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xc268 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0xc26c >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xc270 >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0xc274 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0xc28c >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0xc290 >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0xc294 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0xc298 >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0xc2a0 >> 2),
779         0x00000000,
780         (0x0e00 << 16) | (0xc2a4 >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc2a8 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc2ac >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0x301d0 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0x30238 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0x30250 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0x30254 >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0x30258 >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0x3025c >> 2),
797         0x00000000,
798         (0x4e00 << 16) | (0xc900 >> 2),
799         0x00000000,
800         (0x5e00 << 16) | (0xc900 >> 2),
801         0x00000000,
802         (0x6e00 << 16) | (0xc900 >> 2),
803         0x00000000,
804         (0x7e00 << 16) | (0xc900 >> 2),
805         0x00000000,
806         (0x4e00 << 16) | (0xc904 >> 2),
807         0x00000000,
808         (0x5e00 << 16) | (0xc904 >> 2),
809         0x00000000,
810         (0x6e00 << 16) | (0xc904 >> 2),
811         0x00000000,
812         (0x7e00 << 16) | (0xc904 >> 2),
813         0x00000000,
814         (0x4e00 << 16) | (0xc908 >> 2),
815         0x00000000,
816         (0x5e00 << 16) | (0xc908 >> 2),
817         0x00000000,
818         (0x6e00 << 16) | (0xc908 >> 2),
819         0x00000000,
820         (0x7e00 << 16) | (0xc908 >> 2),
821         0x00000000,
822         (0x4e00 << 16) | (0xc90c >> 2),
823         0x00000000,
824         (0x5e00 << 16) | (0xc90c >> 2),
825         0x00000000,
826         (0x6e00 << 16) | (0xc90c >> 2),
827         0x00000000,
828         (0x7e00 << 16) | (0xc90c >> 2),
829         0x00000000,
830         (0x4e00 << 16) | (0xc910 >> 2),
831         0x00000000,
832         (0x5e00 << 16) | (0xc910 >> 2),
833         0x00000000,
834         (0x6e00 << 16) | (0xc910 >> 2),
835         0x00000000,
836         (0x7e00 << 16) | (0xc910 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xc99c >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0x9834 >> 2),
841         0x00000000,
842         (0x0000 << 16) | (0x30f00 >> 2),
843         0x00000000,
844         (0x0000 << 16) | (0x30f04 >> 2),
845         0x00000000,
846         (0x0000 << 16) | (0x30f08 >> 2),
847         0x00000000,
848         (0x0000 << 16) | (0x30f0c >> 2),
849         0x00000000,
850         (0x0600 << 16) | (0x9b7c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x8a14 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x8a18 >> 2),
855         0x00000000,
856         (0x0600 << 16) | (0x30a00 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x8bf0 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x8bcc >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x8b24 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0x30a04 >> 2),
865         0x00000000,
866         (0x0600 << 16) | (0x30a10 >> 2),
867         0x00000000,
868         (0x0600 << 16) | (0x30a14 >> 2),
869         0x00000000,
870         (0x0600 << 16) | (0x30a18 >> 2),
871         0x00000000,
872         (0x0600 << 16) | (0x30a2c >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xc700 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xc704 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xc708 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xc768 >> 2),
881         0x00000000,
882         (0x0400 << 16) | (0xc770 >> 2),
883         0x00000000,
884         (0x0400 << 16) | (0xc774 >> 2),
885         0x00000000,
886         (0x0400 << 16) | (0xc798 >> 2),
887         0x00000000,
888         (0x0400 << 16) | (0xc79c >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x9100 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x3c010 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x8c00 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x8c04 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x8c20 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x8c38 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x8c3c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0xae00 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x9604 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0xac08 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0xac0c >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0xac10 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0xac14 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0xac58 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0xac68 >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0xac6c >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0xac70 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xac74 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0xac78 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0xac7c >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0xac80 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0xac84 >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0xac88 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0xac8c >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x970c >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x9714 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x9718 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x971c >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x31068 >> 2),
947         0x00000000,
948         (0x4e00 << 16) | (0x31068 >> 2),
949         0x00000000,
950         (0x5e00 << 16) | (0x31068 >> 2),
951         0x00000000,
952         (0x6e00 << 16) | (0x31068 >> 2),
953         0x00000000,
954         (0x7e00 << 16) | (0x31068 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0xcd10 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0xcd14 >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0x88b0 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0x88b4 >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0x88b8 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0x88bc >> 2),
967         0x00000000,
968         (0x0400 << 16) | (0x89c0 >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x88c4 >> 2),
971         0x00000000,
972         (0x0e00 << 16) | (0x88c8 >> 2),
973         0x00000000,
974         (0x0e00 << 16) | (0x88d0 >> 2),
975         0x00000000,
976         (0x0e00 << 16) | (0x88d4 >> 2),
977         0x00000000,
978         (0x0e00 << 16) | (0x88d8 >> 2),
979         0x00000000,
980         (0x0e00 << 16) | (0x8980 >> 2),
981         0x00000000,
982         (0x0e00 << 16) | (0x30938 >> 2),
983         0x00000000,
984         (0x0e00 << 16) | (0x3093c >> 2),
985         0x00000000,
986         (0x0e00 << 16) | (0x30940 >> 2),
987         0x00000000,
988         (0x0e00 << 16) | (0x89a0 >> 2),
989         0x00000000,
990         (0x0e00 << 16) | (0x30900 >> 2),
991         0x00000000,
992         (0x0e00 << 16) | (0x30904 >> 2),
993         0x00000000,
994         (0x0e00 << 16) | (0x89b4 >> 2),
995         0x00000000,
996         (0x0e00 << 16) | (0x3e1fc >> 2),
997         0x00000000,
998         (0x0e00 << 16) | (0x3c210 >> 2),
999         0x00000000,
1000         (0x0e00 << 16) | (0x3c214 >> 2),
1001         0x00000000,
1002         (0x0e00 << 16) | (0x3c218 >> 2),
1003         0x00000000,
1004         (0x0e00 << 16) | (0x8904 >> 2),
1005         0x00000000,
1006         0x5,
1007         (0x0e00 << 16) | (0x8c28 >> 2),
1008         (0x0e00 << 16) | (0x8c2c >> 2),
1009         (0x0e00 << 16) | (0x8c30 >> 2),
1010         (0x0e00 << 16) | (0x8c34 >> 2),
1011         (0x0e00 << 16) | (0x9600 >> 2),
1012 };
1013
1014 static const u32 bonaire_golden_spm_registers[] =
1015 {
1016         0x30800, 0xe0ffffff, 0xe0000000
1017 };
1018
1019 static const u32 bonaire_golden_common_registers[] =
1020 {
1021         0xc770, 0xffffffff, 0x00000800,
1022         0xc774, 0xffffffff, 0x00000800,
1023         0xc798, 0xffffffff, 0x00007fbf,
1024         0xc79c, 0xffffffff, 0x00007faf
1025 };
1026
1027 static const u32 bonaire_golden_registers[] =
1028 {
1029         0x3354, 0x00000333, 0x00000333,
1030         0x3350, 0x000c0fc0, 0x00040200,
1031         0x9a10, 0x00010000, 0x00058208,
1032         0x3c000, 0xffff1fff, 0x00140000,
1033         0x3c200, 0xfdfc0fff, 0x00000100,
1034         0x3c234, 0x40000000, 0x40000200,
1035         0x9830, 0xffffffff, 0x00000000,
1036         0x9834, 0xf00fffff, 0x00000400,
1037         0x9838, 0x0002021c, 0x00020200,
1038         0xc78, 0x00000080, 0x00000000,
1039         0x5bb0, 0x000000f0, 0x00000070,
1040         0x5bc0, 0xf0311fff, 0x80300000,
1041         0x98f8, 0x73773777, 0x12010001,
1042         0x350c, 0x00810000, 0x408af000,
1043         0x7030, 0x31000111, 0x00000011,
1044         0x2f48, 0x73773777, 0x12010001,
1045         0x220c, 0x00007fb6, 0x0021a1b1,
1046         0x2210, 0x00007fb6, 0x002021b1,
1047         0x2180, 0x00007fb6, 0x00002191,
1048         0x2218, 0x00007fb6, 0x002121b1,
1049         0x221c, 0x00007fb6, 0x002021b1,
1050         0x21dc, 0x00007fb6, 0x00002191,
1051         0x21e0, 0x00007fb6, 0x00002191,
1052         0x3628, 0x0000003f, 0x0000000a,
1053         0x362c, 0x0000003f, 0x0000000a,
1054         0x2ae4, 0x00073ffe, 0x000022a2,
1055         0x240c, 0x000007ff, 0x00000000,
1056         0x8a14, 0xf000003f, 0x00000007,
1057         0x8bf0, 0x00002001, 0x00000001,
1058         0x8b24, 0xffffffff, 0x00ffffff,
1059         0x30a04, 0x0000ff0f, 0x00000000,
1060         0x28a4c, 0x07ffffff, 0x06000000,
1061         0x4d8, 0x00000fff, 0x00000100,
1062         0x3e78, 0x00000001, 0x00000002,
1063         0x9100, 0x03000000, 0x0362c688,
1064         0x8c00, 0x000000ff, 0x00000001,
1065         0xe40, 0x00001fff, 0x00001fff,
1066         0x9060, 0x0000007f, 0x00000020,
1067         0x9508, 0x00010000, 0x00010000,
1068         0xac14, 0x000003ff, 0x000000f3,
1069         0xac0c, 0xffffffff, 0x00001032
1070 };
1071
1072 static const u32 bonaire_mgcg_cgcg_init[] =
1073 {
1074         0xc420, 0xffffffff, 0xfffffffc,
1075         0x30800, 0xffffffff, 0xe0000000,
1076         0x3c2a0, 0xffffffff, 0x00000100,
1077         0x3c208, 0xffffffff, 0x00000100,
1078         0x3c2c0, 0xffffffff, 0xc0000100,
1079         0x3c2c8, 0xffffffff, 0xc0000100,
1080         0x3c2c4, 0xffffffff, 0xc0000100,
1081         0x55e4, 0xffffffff, 0x00600100,
1082         0x3c280, 0xffffffff, 0x00000100,
1083         0x3c214, 0xffffffff, 0x06000100,
1084         0x3c220, 0xffffffff, 0x00000100,
1085         0x3c218, 0xffffffff, 0x06000100,
1086         0x3c204, 0xffffffff, 0x00000100,
1087         0x3c2e0, 0xffffffff, 0x00000100,
1088         0x3c224, 0xffffffff, 0x00000100,
1089         0x3c200, 0xffffffff, 0x00000100,
1090         0x3c230, 0xffffffff, 0x00000100,
1091         0x3c234, 0xffffffff, 0x00000100,
1092         0x3c250, 0xffffffff, 0x00000100,
1093         0x3c254, 0xffffffff, 0x00000100,
1094         0x3c258, 0xffffffff, 0x00000100,
1095         0x3c25c, 0xffffffff, 0x00000100,
1096         0x3c260, 0xffffffff, 0x00000100,
1097         0x3c27c, 0xffffffff, 0x00000100,
1098         0x3c278, 0xffffffff, 0x00000100,
1099         0x3c210, 0xffffffff, 0x06000100,
1100         0x3c290, 0xffffffff, 0x00000100,
1101         0x3c274, 0xffffffff, 0x00000100,
1102         0x3c2b4, 0xffffffff, 0x00000100,
1103         0x3c2b0, 0xffffffff, 0x00000100,
1104         0x3c270, 0xffffffff, 0x00000100,
1105         0x30800, 0xffffffff, 0xe0000000,
1106         0x3c020, 0xffffffff, 0x00010000,
1107         0x3c024, 0xffffffff, 0x00030002,
1108         0x3c028, 0xffffffff, 0x00040007,
1109         0x3c02c, 0xffffffff, 0x00060005,
1110         0x3c030, 0xffffffff, 0x00090008,
1111         0x3c034, 0xffffffff, 0x00010000,
1112         0x3c038, 0xffffffff, 0x00030002,
1113         0x3c03c, 0xffffffff, 0x00040007,
1114         0x3c040, 0xffffffff, 0x00060005,
1115         0x3c044, 0xffffffff, 0x00090008,
1116         0x3c048, 0xffffffff, 0x00010000,
1117         0x3c04c, 0xffffffff, 0x00030002,
1118         0x3c050, 0xffffffff, 0x00040007,
1119         0x3c054, 0xffffffff, 0x00060005,
1120         0x3c058, 0xffffffff, 0x00090008,
1121         0x3c05c, 0xffffffff, 0x00010000,
1122         0x3c060, 0xffffffff, 0x00030002,
1123         0x3c064, 0xffffffff, 0x00040007,
1124         0x3c068, 0xffffffff, 0x00060005,
1125         0x3c06c, 0xffffffff, 0x00090008,
1126         0x3c070, 0xffffffff, 0x00010000,
1127         0x3c074, 0xffffffff, 0x00030002,
1128         0x3c078, 0xffffffff, 0x00040007,
1129         0x3c07c, 0xffffffff, 0x00060005,
1130         0x3c080, 0xffffffff, 0x00090008,
1131         0x3c084, 0xffffffff, 0x00010000,
1132         0x3c088, 0xffffffff, 0x00030002,
1133         0x3c08c, 0xffffffff, 0x00040007,
1134         0x3c090, 0xffffffff, 0x00060005,
1135         0x3c094, 0xffffffff, 0x00090008,
1136         0x3c098, 0xffffffff, 0x00010000,
1137         0x3c09c, 0xffffffff, 0x00030002,
1138         0x3c0a0, 0xffffffff, 0x00040007,
1139         0x3c0a4, 0xffffffff, 0x00060005,
1140         0x3c0a8, 0xffffffff, 0x00090008,
1141         0x3c000, 0xffffffff, 0x96e00200,
1142         0x8708, 0xffffffff, 0x00900100,
1143         0xc424, 0xffffffff, 0x0020003f,
1144         0x38, 0xffffffff, 0x0140001c,
1145         0x3c, 0x000f0000, 0x000f0000,
1146         0x220, 0xffffffff, 0xC060000C,
1147         0x224, 0xc0000fff, 0x00000100,
1148         0xf90, 0xffffffff, 0x00000100,
1149         0xf98, 0x00000101, 0x00000000,
1150         0x20a8, 0xffffffff, 0x00000104,
1151         0x55e4, 0xff000fff, 0x00000100,
1152         0x30cc, 0xc0000fff, 0x00000104,
1153         0xc1e4, 0x00000001, 0x00000001,
1154         0xd00c, 0xff000ff0, 0x00000100,
1155         0xd80c, 0xff000ff0, 0x00000100
1156 };
1157
1158 static const u32 spectre_golden_spm_registers[] =
1159 {
1160         0x30800, 0xe0ffffff, 0xe0000000
1161 };
1162
1163 static const u32 spectre_golden_common_registers[] =
1164 {
1165         0xc770, 0xffffffff, 0x00000800,
1166         0xc774, 0xffffffff, 0x00000800,
1167         0xc798, 0xffffffff, 0x00007fbf,
1168         0xc79c, 0xffffffff, 0x00007faf
1169 };
1170
1171 static const u32 spectre_golden_registers[] =
1172 {
1173         0x3c000, 0xffff1fff, 0x96940200,
1174         0x3c00c, 0xffff0001, 0xff000000,
1175         0x3c200, 0xfffc0fff, 0x00000100,
1176         0x6ed8, 0x00010101, 0x00010000,
1177         0x9834, 0xf00fffff, 0x00000400,
1178         0x9838, 0xfffffffc, 0x00020200,
1179         0x5bb0, 0x000000f0, 0x00000070,
1180         0x5bc0, 0xf0311fff, 0x80300000,
1181         0x98f8, 0x73773777, 0x12010001,
1182         0x9b7c, 0x00ff0000, 0x00fc0000,
1183         0x2f48, 0x73773777, 0x12010001,
1184         0x8a14, 0xf000003f, 0x00000007,
1185         0x8b24, 0xffffffff, 0x00ffffff,
1186         0x28350, 0x3f3f3fff, 0x00000082,
1187         0x28354, 0x0000003f, 0x00000000,
1188         0x3e78, 0x00000001, 0x00000002,
1189         0x913c, 0xffff03df, 0x00000004,
1190         0xc768, 0x00000008, 0x00000008,
1191         0x8c00, 0x000008ff, 0x00000800,
1192         0x9508, 0x00010000, 0x00010000,
1193         0xac0c, 0xffffffff, 0x54763210,
1194         0x214f8, 0x01ff01ff, 0x00000002,
1195         0x21498, 0x007ff800, 0x00200000,
1196         0x2015c, 0xffffffff, 0x00000f40,
1197         0x30934, 0xffffffff, 0x00000001
1198 };
1199
1200 static const u32 spectre_mgcg_cgcg_init[] =
1201 {
1202         0xc420, 0xffffffff, 0xfffffffc,
1203         0x30800, 0xffffffff, 0xe0000000,
1204         0x3c2a0, 0xffffffff, 0x00000100,
1205         0x3c208, 0xffffffff, 0x00000100,
1206         0x3c2c0, 0xffffffff, 0x00000100,
1207         0x3c2c8, 0xffffffff, 0x00000100,
1208         0x3c2c4, 0xffffffff, 0x00000100,
1209         0x55e4, 0xffffffff, 0x00600100,
1210         0x3c280, 0xffffffff, 0x00000100,
1211         0x3c214, 0xffffffff, 0x06000100,
1212         0x3c220, 0xffffffff, 0x00000100,
1213         0x3c218, 0xffffffff, 0x06000100,
1214         0x3c204, 0xffffffff, 0x00000100,
1215         0x3c2e0, 0xffffffff, 0x00000100,
1216         0x3c224, 0xffffffff, 0x00000100,
1217         0x3c200, 0xffffffff, 0x00000100,
1218         0x3c230, 0xffffffff, 0x00000100,
1219         0x3c234, 0xffffffff, 0x00000100,
1220         0x3c250, 0xffffffff, 0x00000100,
1221         0x3c254, 0xffffffff, 0x00000100,
1222         0x3c258, 0xffffffff, 0x00000100,
1223         0x3c25c, 0xffffffff, 0x00000100,
1224         0x3c260, 0xffffffff, 0x00000100,
1225         0x3c27c, 0xffffffff, 0x00000100,
1226         0x3c278, 0xffffffff, 0x00000100,
1227         0x3c210, 0xffffffff, 0x06000100,
1228         0x3c290, 0xffffffff, 0x00000100,
1229         0x3c274, 0xffffffff, 0x00000100,
1230         0x3c2b4, 0xffffffff, 0x00000100,
1231         0x3c2b0, 0xffffffff, 0x00000100,
1232         0x3c270, 0xffffffff, 0x00000100,
1233         0x30800, 0xffffffff, 0xe0000000,
1234         0x3c020, 0xffffffff, 0x00010000,
1235         0x3c024, 0xffffffff, 0x00030002,
1236         0x3c028, 0xffffffff, 0x00040007,
1237         0x3c02c, 0xffffffff, 0x00060005,
1238         0x3c030, 0xffffffff, 0x00090008,
1239         0x3c034, 0xffffffff, 0x00010000,
1240         0x3c038, 0xffffffff, 0x00030002,
1241         0x3c03c, 0xffffffff, 0x00040007,
1242         0x3c040, 0xffffffff, 0x00060005,
1243         0x3c044, 0xffffffff, 0x00090008,
1244         0x3c048, 0xffffffff, 0x00010000,
1245         0x3c04c, 0xffffffff, 0x00030002,
1246         0x3c050, 0xffffffff, 0x00040007,
1247         0x3c054, 0xffffffff, 0x00060005,
1248         0x3c058, 0xffffffff, 0x00090008,
1249         0x3c05c, 0xffffffff, 0x00010000,
1250         0x3c060, 0xffffffff, 0x00030002,
1251         0x3c064, 0xffffffff, 0x00040007,
1252         0x3c068, 0xffffffff, 0x00060005,
1253         0x3c06c, 0xffffffff, 0x00090008,
1254         0x3c070, 0xffffffff, 0x00010000,
1255         0x3c074, 0xffffffff, 0x00030002,
1256         0x3c078, 0xffffffff, 0x00040007,
1257         0x3c07c, 0xffffffff, 0x00060005,
1258         0x3c080, 0xffffffff, 0x00090008,
1259         0x3c084, 0xffffffff, 0x00010000,
1260         0x3c088, 0xffffffff, 0x00030002,
1261         0x3c08c, 0xffffffff, 0x00040007,
1262         0x3c090, 0xffffffff, 0x00060005,
1263         0x3c094, 0xffffffff, 0x00090008,
1264         0x3c098, 0xffffffff, 0x00010000,
1265         0x3c09c, 0xffffffff, 0x00030002,
1266         0x3c0a0, 0xffffffff, 0x00040007,
1267         0x3c0a4, 0xffffffff, 0x00060005,
1268         0x3c0a8, 0xffffffff, 0x00090008,
1269         0x3c0ac, 0xffffffff, 0x00010000,
1270         0x3c0b0, 0xffffffff, 0x00030002,
1271         0x3c0b4, 0xffffffff, 0x00040007,
1272         0x3c0b8, 0xffffffff, 0x00060005,
1273         0x3c0bc, 0xffffffff, 0x00090008,
1274         0x3c000, 0xffffffff, 0x96e00200,
1275         0x8708, 0xffffffff, 0x00900100,
1276         0xc424, 0xffffffff, 0x0020003f,
1277         0x38, 0xffffffff, 0x0140001c,
1278         0x3c, 0x000f0000, 0x000f0000,
1279         0x220, 0xffffffff, 0xC060000C,
1280         0x224, 0xc0000fff, 0x00000100,
1281         0xf90, 0xffffffff, 0x00000100,
1282         0xf98, 0x00000101, 0x00000000,
1283         0x20a8, 0xffffffff, 0x00000104,
1284         0x55e4, 0xff000fff, 0x00000100,
1285         0x30cc, 0xc0000fff, 0x00000104,
1286         0xc1e4, 0x00000001, 0x00000001,
1287         0xd00c, 0xff000ff0, 0x00000100,
1288         0xd80c, 0xff000ff0, 0x00000100
1289 };
1290
1291 static const u32 kalindi_golden_spm_registers[] =
1292 {
1293         0x30800, 0xe0ffffff, 0xe0000000
1294 };
1295
1296 static const u32 kalindi_golden_common_registers[] =
1297 {
1298         0xc770, 0xffffffff, 0x00000800,
1299         0xc774, 0xffffffff, 0x00000800,
1300         0xc798, 0xffffffff, 0x00007fbf,
1301         0xc79c, 0xffffffff, 0x00007faf
1302 };
1303
1304 static const u32 kalindi_golden_registers[] =
1305 {
1306         0x3c000, 0xffffdfff, 0x6e944040,
1307         0x55e4, 0xff607fff, 0xfc000100,
1308         0x3c220, 0xff000fff, 0x00000100,
1309         0x3c224, 0xff000fff, 0x00000100,
1310         0x3c200, 0xfffc0fff, 0x00000100,
1311         0x6ed8, 0x00010101, 0x00010000,
1312         0x9830, 0xffffffff, 0x00000000,
1313         0x9834, 0xf00fffff, 0x00000400,
1314         0x5bb0, 0x000000f0, 0x00000070,
1315         0x5bc0, 0xf0311fff, 0x80300000,
1316         0x98f8, 0x73773777, 0x12010001,
1317         0x98fc, 0xffffffff, 0x00000010,
1318         0x9b7c, 0x00ff0000, 0x00fc0000,
1319         0x8030, 0x00001f0f, 0x0000100a,
1320         0x2f48, 0x73773777, 0x12010001,
1321         0x2408, 0x000fffff, 0x000c007f,
1322         0x8a14, 0xf000003f, 0x00000007,
1323         0x8b24, 0x3fff3fff, 0x00ffcfff,
1324         0x30a04, 0x0000ff0f, 0x00000000,
1325         0x28a4c, 0x07ffffff, 0x06000000,
1326         0x4d8, 0x00000fff, 0x00000100,
1327         0x3e78, 0x00000001, 0x00000002,
1328         0xc768, 0x00000008, 0x00000008,
1329         0x8c00, 0x000000ff, 0x00000003,
1330         0x214f8, 0x01ff01ff, 0x00000002,
1331         0x21498, 0x007ff800, 0x00200000,
1332         0x2015c, 0xffffffff, 0x00000f40,
1333         0x88c4, 0x001f3ae3, 0x00000082,
1334         0x88d4, 0x0000001f, 0x00000010,
1335         0x30934, 0xffffffff, 0x00000000
1336 };
1337
1338 static const u32 kalindi_mgcg_cgcg_init[] =
1339 {
1340         0xc420, 0xffffffff, 0xfffffffc,
1341         0x30800, 0xffffffff, 0xe0000000,
1342         0x3c2a0, 0xffffffff, 0x00000100,
1343         0x3c208, 0xffffffff, 0x00000100,
1344         0x3c2c0, 0xffffffff, 0x00000100,
1345         0x3c2c8, 0xffffffff, 0x00000100,
1346         0x3c2c4, 0xffffffff, 0x00000100,
1347         0x55e4, 0xffffffff, 0x00600100,
1348         0x3c280, 0xffffffff, 0x00000100,
1349         0x3c214, 0xffffffff, 0x06000100,
1350         0x3c220, 0xffffffff, 0x00000100,
1351         0x3c218, 0xffffffff, 0x06000100,
1352         0x3c204, 0xffffffff, 0x00000100,
1353         0x3c2e0, 0xffffffff, 0x00000100,
1354         0x3c224, 0xffffffff, 0x00000100,
1355         0x3c200, 0xffffffff, 0x00000100,
1356         0x3c230, 0xffffffff, 0x00000100,
1357         0x3c234, 0xffffffff, 0x00000100,
1358         0x3c250, 0xffffffff, 0x00000100,
1359         0x3c254, 0xffffffff, 0x00000100,
1360         0x3c258, 0xffffffff, 0x00000100,
1361         0x3c25c, 0xffffffff, 0x00000100,
1362         0x3c260, 0xffffffff, 0x00000100,
1363         0x3c27c, 0xffffffff, 0x00000100,
1364         0x3c278, 0xffffffff, 0x00000100,
1365         0x3c210, 0xffffffff, 0x06000100,
1366         0x3c290, 0xffffffff, 0x00000100,
1367         0x3c274, 0xffffffff, 0x00000100,
1368         0x3c2b4, 0xffffffff, 0x00000100,
1369         0x3c2b0, 0xffffffff, 0x00000100,
1370         0x3c270, 0xffffffff, 0x00000100,
1371         0x30800, 0xffffffff, 0xe0000000,
1372         0x3c020, 0xffffffff, 0x00010000,
1373         0x3c024, 0xffffffff, 0x00030002,
1374         0x3c028, 0xffffffff, 0x00040007,
1375         0x3c02c, 0xffffffff, 0x00060005,
1376         0x3c030, 0xffffffff, 0x00090008,
1377         0x3c034, 0xffffffff, 0x00010000,
1378         0x3c038, 0xffffffff, 0x00030002,
1379         0x3c03c, 0xffffffff, 0x00040007,
1380         0x3c040, 0xffffffff, 0x00060005,
1381         0x3c044, 0xffffffff, 0x00090008,
1382         0x3c000, 0xffffffff, 0x96e00200,
1383         0x8708, 0xffffffff, 0x00900100,
1384         0xc424, 0xffffffff, 0x0020003f,
1385         0x38, 0xffffffff, 0x0140001c,
1386         0x3c, 0x000f0000, 0x000f0000,
1387         0x220, 0xffffffff, 0xC060000C,
1388         0x224, 0xc0000fff, 0x00000100,
1389         0x20a8, 0xffffffff, 0x00000104,
1390         0x55e4, 0xff000fff, 0x00000100,
1391         0x30cc, 0xc0000fff, 0x00000104,
1392         0xc1e4, 0x00000001, 0x00000001,
1393         0xd00c, 0xff000ff0, 0x00000100,
1394         0xd80c, 0xff000ff0, 0x00000100
1395 };
1396
1397 static const u32 hawaii_golden_spm_registers[] =
1398 {
1399         0x30800, 0xe0ffffff, 0xe0000000
1400 };
1401
1402 static const u32 hawaii_golden_common_registers[] =
1403 {
1404         0x30800, 0xffffffff, 0xe0000000,
1405         0x28350, 0xffffffff, 0x3a00161a,
1406         0x28354, 0xffffffff, 0x0000002e,
1407         0x9a10, 0xffffffff, 0x00018208,
1408         0x98f8, 0xffffffff, 0x12011003
1409 };
1410
1411 static const u32 hawaii_golden_registers[] =
1412 {
1413         0x3354, 0x00000333, 0x00000333,
1414         0x9a10, 0x00010000, 0x00058208,
1415         0x9830, 0xffffffff, 0x00000000,
1416         0x9834, 0xf00fffff, 0x00000400,
1417         0x9838, 0x0002021c, 0x00020200,
1418         0xc78, 0x00000080, 0x00000000,
1419         0x5bb0, 0x000000f0, 0x00000070,
1420         0x5bc0, 0xf0311fff, 0x80300000,
1421         0x350c, 0x00810000, 0x408af000,
1422         0x7030, 0x31000111, 0x00000011,
1423         0x2f48, 0x73773777, 0x12010001,
1424         0x2120, 0x0000007f, 0x0000001b,
1425         0x21dc, 0x00007fb6, 0x00002191,
1426         0x3628, 0x0000003f, 0x0000000a,
1427         0x362c, 0x0000003f, 0x0000000a,
1428         0x2ae4, 0x00073ffe, 0x000022a2,
1429         0x240c, 0x000007ff, 0x00000000,
1430         0x8bf0, 0x00002001, 0x00000001,
1431         0x8b24, 0xffffffff, 0x00ffffff,
1432         0x30a04, 0x0000ff0f, 0x00000000,
1433         0x28a4c, 0x07ffffff, 0x06000000,
1434         0x3e78, 0x00000001, 0x00000002,
1435         0xc768, 0x00000008, 0x00000008,
1436         0xc770, 0x00000f00, 0x00000800,
1437         0xc774, 0x00000f00, 0x00000800,
1438         0xc798, 0x00ffffff, 0x00ff7fbf,
1439         0xc79c, 0x00ffffff, 0x00ff7faf,
1440         0x8c00, 0x000000ff, 0x00000800,
1441         0xe40, 0x00001fff, 0x00001fff,
1442         0x9060, 0x0000007f, 0x00000020,
1443         0x9508, 0x00010000, 0x00010000,
1444         0xae00, 0x00100000, 0x000ff07c,
1445         0xac14, 0x000003ff, 0x0000000f,
1446         0xac10, 0xffffffff, 0x7564fdec,
1447         0xac0c, 0xffffffff, 0x3120b9a8,
1448         0xac08, 0x20000000, 0x0f9c0000
1449 };
1450
1451 static const u32 hawaii_mgcg_cgcg_init[] =
1452 {
1453         0xc420, 0xffffffff, 0xfffffffd,
1454         0x30800, 0xffffffff, 0xe0000000,
1455         0x3c2a0, 0xffffffff, 0x00000100,
1456         0x3c208, 0xffffffff, 0x00000100,
1457         0x3c2c0, 0xffffffff, 0x00000100,
1458         0x3c2c8, 0xffffffff, 0x00000100,
1459         0x3c2c4, 0xffffffff, 0x00000100,
1460         0x55e4, 0xffffffff, 0x00200100,
1461         0x3c280, 0xffffffff, 0x00000100,
1462         0x3c214, 0xffffffff, 0x06000100,
1463         0x3c220, 0xffffffff, 0x00000100,
1464         0x3c218, 0xffffffff, 0x06000100,
1465         0x3c204, 0xffffffff, 0x00000100,
1466         0x3c2e0, 0xffffffff, 0x00000100,
1467         0x3c224, 0xffffffff, 0x00000100,
1468         0x3c200, 0xffffffff, 0x00000100,
1469         0x3c230, 0xffffffff, 0x00000100,
1470         0x3c234, 0xffffffff, 0x00000100,
1471         0x3c250, 0xffffffff, 0x00000100,
1472         0x3c254, 0xffffffff, 0x00000100,
1473         0x3c258, 0xffffffff, 0x00000100,
1474         0x3c25c, 0xffffffff, 0x00000100,
1475         0x3c260, 0xffffffff, 0x00000100,
1476         0x3c27c, 0xffffffff, 0x00000100,
1477         0x3c278, 0xffffffff, 0x00000100,
1478         0x3c210, 0xffffffff, 0x06000100,
1479         0x3c290, 0xffffffff, 0x00000100,
1480         0x3c274, 0xffffffff, 0x00000100,
1481         0x3c2b4, 0xffffffff, 0x00000100,
1482         0x3c2b0, 0xffffffff, 0x00000100,
1483         0x3c270, 0xffffffff, 0x00000100,
1484         0x30800, 0xffffffff, 0xe0000000,
1485         0x3c020, 0xffffffff, 0x00010000,
1486         0x3c024, 0xffffffff, 0x00030002,
1487         0x3c028, 0xffffffff, 0x00040007,
1488         0x3c02c, 0xffffffff, 0x00060005,
1489         0x3c030, 0xffffffff, 0x00090008,
1490         0x3c034, 0xffffffff, 0x00010000,
1491         0x3c038, 0xffffffff, 0x00030002,
1492         0x3c03c, 0xffffffff, 0x00040007,
1493         0x3c040, 0xffffffff, 0x00060005,
1494         0x3c044, 0xffffffff, 0x00090008,
1495         0x3c048, 0xffffffff, 0x00010000,
1496         0x3c04c, 0xffffffff, 0x00030002,
1497         0x3c050, 0xffffffff, 0x00040007,
1498         0x3c054, 0xffffffff, 0x00060005,
1499         0x3c058, 0xffffffff, 0x00090008,
1500         0x3c05c, 0xffffffff, 0x00010000,
1501         0x3c060, 0xffffffff, 0x00030002,
1502         0x3c064, 0xffffffff, 0x00040007,
1503         0x3c068, 0xffffffff, 0x00060005,
1504         0x3c06c, 0xffffffff, 0x00090008,
1505         0x3c070, 0xffffffff, 0x00010000,
1506         0x3c074, 0xffffffff, 0x00030002,
1507         0x3c078, 0xffffffff, 0x00040007,
1508         0x3c07c, 0xffffffff, 0x00060005,
1509         0x3c080, 0xffffffff, 0x00090008,
1510         0x3c084, 0xffffffff, 0x00010000,
1511         0x3c088, 0xffffffff, 0x00030002,
1512         0x3c08c, 0xffffffff, 0x00040007,
1513         0x3c090, 0xffffffff, 0x00060005,
1514         0x3c094, 0xffffffff, 0x00090008,
1515         0x3c098, 0xffffffff, 0x00010000,
1516         0x3c09c, 0xffffffff, 0x00030002,
1517         0x3c0a0, 0xffffffff, 0x00040007,
1518         0x3c0a4, 0xffffffff, 0x00060005,
1519         0x3c0a8, 0xffffffff, 0x00090008,
1520         0x3c0ac, 0xffffffff, 0x00010000,
1521         0x3c0b0, 0xffffffff, 0x00030002,
1522         0x3c0b4, 0xffffffff, 0x00040007,
1523         0x3c0b8, 0xffffffff, 0x00060005,
1524         0x3c0bc, 0xffffffff, 0x00090008,
1525         0x3c0c0, 0xffffffff, 0x00010000,
1526         0x3c0c4, 0xffffffff, 0x00030002,
1527         0x3c0c8, 0xffffffff, 0x00040007,
1528         0x3c0cc, 0xffffffff, 0x00060005,
1529         0x3c0d0, 0xffffffff, 0x00090008,
1530         0x3c0d4, 0xffffffff, 0x00010000,
1531         0x3c0d8, 0xffffffff, 0x00030002,
1532         0x3c0dc, 0xffffffff, 0x00040007,
1533         0x3c0e0, 0xffffffff, 0x00060005,
1534         0x3c0e4, 0xffffffff, 0x00090008,
1535         0x3c0e8, 0xffffffff, 0x00010000,
1536         0x3c0ec, 0xffffffff, 0x00030002,
1537         0x3c0f0, 0xffffffff, 0x00040007,
1538         0x3c0f4, 0xffffffff, 0x00060005,
1539         0x3c0f8, 0xffffffff, 0x00090008,
1540         0xc318, 0xffffffff, 0x00020200,
1541         0x3350, 0xffffffff, 0x00000200,
1542         0x15c0, 0xffffffff, 0x00000400,
1543         0x55e8, 0xffffffff, 0x00000000,
1544         0x2f50, 0xffffffff, 0x00000902,
1545         0x3c000, 0xffffffff, 0x96940200,
1546         0x8708, 0xffffffff, 0x00900100,
1547         0xc424, 0xffffffff, 0x0020003f,
1548         0x38, 0xffffffff, 0x0140001c,
1549         0x3c, 0x000f0000, 0x000f0000,
1550         0x220, 0xffffffff, 0xc060000c,
1551         0x224, 0xc0000fff, 0x00000100,
1552         0xf90, 0xffffffff, 0x00000100,
1553         0xf98, 0x00000101, 0x00000000,
1554         0x20a8, 0xffffffff, 0x00000104,
1555         0x55e4, 0xff000fff, 0x00000100,
1556         0x30cc, 0xc0000fff, 0x00000104,
1557         0xc1e4, 0x00000001, 0x00000001,
1558         0xd00c, 0xff000ff0, 0x00000100,
1559         0xd80c, 0xff000ff0, 0x00000100
1560 };
1561
1562 static const u32 godavari_golden_registers[] =
1563 {
1564         0x55e4, 0xff607fff, 0xfc000100,
1565         0x6ed8, 0x00010101, 0x00010000,
1566         0x9830, 0xffffffff, 0x00000000,
1567         0x98302, 0xf00fffff, 0x00000400,
1568         0x6130, 0xffffffff, 0x00010000,
1569         0x5bb0, 0x000000f0, 0x00000070,
1570         0x5bc0, 0xf0311fff, 0x80300000,
1571         0x98f8, 0x73773777, 0x12010001,
1572         0x98fc, 0xffffffff, 0x00000010,
1573         0x8030, 0x00001f0f, 0x0000100a,
1574         0x2f48, 0x73773777, 0x12010001,
1575         0x2408, 0x000fffff, 0x000c007f,
1576         0x8a14, 0xf000003f, 0x00000007,
1577         0x8b24, 0xffffffff, 0x00ff0fff,
1578         0x30a04, 0x0000ff0f, 0x00000000,
1579         0x28a4c, 0x07ffffff, 0x06000000,
1580         0x4d8, 0x00000fff, 0x00000100,
1581         0xd014, 0x00010000, 0x00810001,
1582         0xd814, 0x00010000, 0x00810001,
1583         0x3e78, 0x00000001, 0x00000002,
1584         0xc768, 0x00000008, 0x00000008,
1585         0xc770, 0x00000f00, 0x00000800,
1586         0xc774, 0x00000f00, 0x00000800,
1587         0xc798, 0x00ffffff, 0x00ff7fbf,
1588         0xc79c, 0x00ffffff, 0x00ff7faf,
1589         0x8c00, 0x000000ff, 0x00000001,
1590         0x214f8, 0x01ff01ff, 0x00000002,
1591         0x21498, 0x007ff800, 0x00200000,
1592         0x2015c, 0xffffffff, 0x00000f40,
1593         0x88c4, 0x001f3ae3, 0x00000082,
1594         0x88d4, 0x0000001f, 0x00000010,
1595         0x30934, 0xffffffff, 0x00000000
1596 };
1597
1598
1599 static void cik_init_golden_registers(struct radeon_device *rdev)
1600 {
1601         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1602         mutex_lock(&rdev->grbm_idx_mutex);
1603         switch (rdev->family) {
1604         case CHIP_BONAIRE:
1605                 radeon_program_register_sequence(rdev,
1606                                                  bonaire_mgcg_cgcg_init,
1607                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1608                 radeon_program_register_sequence(rdev,
1609                                                  bonaire_golden_registers,
1610                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1611                 radeon_program_register_sequence(rdev,
1612                                                  bonaire_golden_common_registers,
1613                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1614                 radeon_program_register_sequence(rdev,
1615                                                  bonaire_golden_spm_registers,
1616                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1617                 break;
1618         case CHIP_KABINI:
1619                 radeon_program_register_sequence(rdev,
1620                                                  kalindi_mgcg_cgcg_init,
1621                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1622                 radeon_program_register_sequence(rdev,
1623                                                  kalindi_golden_registers,
1624                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1625                 radeon_program_register_sequence(rdev,
1626                                                  kalindi_golden_common_registers,
1627                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1628                 radeon_program_register_sequence(rdev,
1629                                                  kalindi_golden_spm_registers,
1630                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1631                 break;
1632         case CHIP_MULLINS:
1633                 radeon_program_register_sequence(rdev,
1634                                                  kalindi_mgcg_cgcg_init,
1635                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1636                 radeon_program_register_sequence(rdev,
1637                                                  godavari_golden_registers,
1638                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1639                 radeon_program_register_sequence(rdev,
1640                                                  kalindi_golden_common_registers,
1641                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1642                 radeon_program_register_sequence(rdev,
1643                                                  kalindi_golden_spm_registers,
1644                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1645                 break;
1646         case CHIP_KAVERI:
1647                 radeon_program_register_sequence(rdev,
1648                                                  spectre_mgcg_cgcg_init,
1649                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1650                 radeon_program_register_sequence(rdev,
1651                                                  spectre_golden_registers,
1652                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1653                 radeon_program_register_sequence(rdev,
1654                                                  spectre_golden_common_registers,
1655                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1656                 radeon_program_register_sequence(rdev,
1657                                                  spectre_golden_spm_registers,
1658                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1659                 break;
1660         case CHIP_HAWAII:
1661                 radeon_program_register_sequence(rdev,
1662                                                  hawaii_mgcg_cgcg_init,
1663                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1664                 radeon_program_register_sequence(rdev,
1665                                                  hawaii_golden_registers,
1666                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1667                 radeon_program_register_sequence(rdev,
1668                                                  hawaii_golden_common_registers,
1669                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1670                 radeon_program_register_sequence(rdev,
1671                                                  hawaii_golden_spm_registers,
1672                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1673                 break;
1674         default:
1675                 break;
1676         }
1677         mutex_unlock(&rdev->grbm_idx_mutex);
1678 }
1679
1680 /**
1681  * cik_get_xclk - get the xclk
1682  *
1683  * @rdev: radeon_device pointer
1684  *
1685  * Returns the reference clock used by the gfx engine
1686  * (CIK).
1687  */
1688 u32 cik_get_xclk(struct radeon_device *rdev)
1689 {
1690         u32 reference_clock = rdev->clock.spll.reference_freq;
1691
1692         if (rdev->flags & RADEON_IS_IGP) {
1693                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1694                         return reference_clock / 2;
1695         } else {
1696                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1697                         return reference_clock / 4;
1698         }
1699         return reference_clock;
1700 }
1701
1702 /**
1703  * cik_mm_rdoorbell - read a doorbell dword
1704  *
1705  * @rdev: radeon_device pointer
1706  * @index: doorbell index
1707  *
1708  * Returns the value in the doorbell aperture at the
1709  * requested doorbell index (CIK).
1710  */
1711 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1712 {
1713         if (index < rdev->doorbell.num_doorbells) {
1714                 return readl(rdev->doorbell.ptr + index);
1715         } else {
1716                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1717                 return 0;
1718         }
1719 }
1720
1721 /**
1722  * cik_mm_wdoorbell - write a doorbell dword
1723  *
1724  * @rdev: radeon_device pointer
1725  * @index: doorbell index
1726  * @v: value to write
1727  *
1728  * Writes @v to the doorbell aperture at the
1729  * requested doorbell index (CIK).
1730  */
1731 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1732 {
1733         if (index < rdev->doorbell.num_doorbells) {
1734                 writel(v, rdev->doorbell.ptr + index);
1735         } else {
1736                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1737         }
1738 }
1739
1740 #define BONAIRE_IO_MC_REGS_SIZE 36
1741
1742 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1743 {
1744         {0x00000070, 0x04400000},
1745         {0x00000071, 0x80c01803},
1746         {0x00000072, 0x00004004},
1747         {0x00000073, 0x00000100},
1748         {0x00000074, 0x00ff0000},
1749         {0x00000075, 0x34000000},
1750         {0x00000076, 0x08000014},
1751         {0x00000077, 0x00cc08ec},
1752         {0x00000078, 0x00000400},
1753         {0x00000079, 0x00000000},
1754         {0x0000007a, 0x04090000},
1755         {0x0000007c, 0x00000000},
1756         {0x0000007e, 0x4408a8e8},
1757         {0x0000007f, 0x00000304},
1758         {0x00000080, 0x00000000},
1759         {0x00000082, 0x00000001},
1760         {0x00000083, 0x00000002},
1761         {0x00000084, 0xf3e4f400},
1762         {0x00000085, 0x052024e3},
1763         {0x00000087, 0x00000000},
1764         {0x00000088, 0x01000000},
1765         {0x0000008a, 0x1c0a0000},
1766         {0x0000008b, 0xff010000},
1767         {0x0000008d, 0xffffefff},
1768         {0x0000008e, 0xfff3efff},
1769         {0x0000008f, 0xfff3efbf},
1770         {0x00000092, 0xf7ffffff},
1771         {0x00000093, 0xffffff7f},
1772         {0x00000095, 0x00101101},
1773         {0x00000096, 0x00000fff},
1774         {0x00000097, 0x00116fff},
1775         {0x00000098, 0x60010000},
1776         {0x00000099, 0x10010000},
1777         {0x0000009a, 0x00006000},
1778         {0x0000009b, 0x00001000},
1779         {0x0000009f, 0x00b48000}
1780 };
1781
1782 #define HAWAII_IO_MC_REGS_SIZE 22
1783
1784 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1785 {
1786         {0x0000007d, 0x40000000},
1787         {0x0000007e, 0x40180304},
1788         {0x0000007f, 0x0000ff00},
1789         {0x00000081, 0x00000000},
1790         {0x00000083, 0x00000800},
1791         {0x00000086, 0x00000000},
1792         {0x00000087, 0x00000100},
1793         {0x00000088, 0x00020100},
1794         {0x00000089, 0x00000000},
1795         {0x0000008b, 0x00040000},
1796         {0x0000008c, 0x00000100},
1797         {0x0000008e, 0xff010000},
1798         {0x00000090, 0xffffefff},
1799         {0x00000091, 0xfff3efff},
1800         {0x00000092, 0xfff3efbf},
1801         {0x00000093, 0xf7ffffff},
1802         {0x00000094, 0xffffff7f},
1803         {0x00000095, 0x00000fff},
1804         {0x00000096, 0x00116fff},
1805         {0x00000097, 0x60010000},
1806         {0x00000098, 0x10010000},
1807         {0x0000009f, 0x00c79000}
1808 };
1809
1810
1811 /**
1812  * cik_srbm_select - select specific register instances
1813  *
1814  * @rdev: radeon_device pointer
1815  * @me: selected ME (micro engine)
1816  * @pipe: pipe
1817  * @queue: queue
1818  * @vmid: VMID
1819  *
1820  * Switches the currently active registers instances.  Some
1821  * registers are instanced per VMID, others are instanced per
1822  * me/pipe/queue combination.
1823  */
1824 static void cik_srbm_select(struct radeon_device *rdev,
1825                             u32 me, u32 pipe, u32 queue, u32 vmid)
1826 {
1827         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1828                              MEID(me & 0x3) |
1829                              VMID(vmid & 0xf) |
1830                              QUEUEID(queue & 0x7));
1831         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1832 }
1833
1834 /* ucode loading */
1835 /**
1836  * ci_mc_load_microcode - load MC ucode into the hw
1837  *
1838  * @rdev: radeon_device pointer
1839  *
1840  * Load the GDDR MC ucode into the hw (CIK).
1841  * Returns 0 on success, error on failure.
1842  */
1843 int ci_mc_load_microcode(struct radeon_device *rdev)
1844 {
1845         const __be32 *fw_data = NULL;
1846         const __le32 *new_fw_data = NULL;
1847         u32 running, blackout = 0, tmp;
1848         u32 *io_mc_regs = NULL;
1849         const __le32 *new_io_mc_regs = NULL;
1850         int i, regs_size, ucode_size;
1851
1852         if (!rdev->mc_fw)
1853                 return -EINVAL;
1854
1855         if (rdev->new_fw) {
1856                 const struct mc_firmware_header_v1_0 *hdr =
1857                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1858
1859                 radeon_ucode_print_mc_hdr(&hdr->header);
1860
1861                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1862                 new_io_mc_regs = (const __le32 *)
1863                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1864                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1865                 new_fw_data = (const __le32 *)
1866                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1867         } else {
1868                 ucode_size = rdev->mc_fw->size / 4;
1869
1870                 switch (rdev->family) {
1871                 case CHIP_BONAIRE:
1872                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1873                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1874                         break;
1875                 case CHIP_HAWAII:
1876                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1877                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1878                         break;
1879                 default:
1880                         return -EINVAL;
1881                 }
1882                 fw_data = (const __be32 *)rdev->mc_fw->data;
1883         }
1884
1885         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1886
1887         if (running == 0) {
1888                 if (running) {
1889                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1890                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1891                 }
1892
1893                 /* reset the engine and set to writable */
1894                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1895                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1896
1897                 /* load mc io regs */
1898                 for (i = 0; i < regs_size; i++) {
1899                         if (rdev->new_fw) {
1900                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1901                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1902                         } else {
1903                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1904                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1905                         }
1906                 }
1907
1908                 tmp = RREG32(MC_SEQ_MISC0);
1909                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1910                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1911                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1912                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1913                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1914                 }
1915
1916                 /* load the MC ucode */
1917                 for (i = 0; i < ucode_size; i++) {
1918                         if (rdev->new_fw)
1919                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1920                         else
1921                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1922                 }
1923
1924                 /* put the engine back into the active state */
1925                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1926                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1927                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1928
1929                 /* wait for training to complete */
1930                 for (i = 0; i < rdev->usec_timeout; i++) {
1931                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1932                                 break;
1933                         udelay(1);
1934                 }
1935                 for (i = 0; i < rdev->usec_timeout; i++) {
1936                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1937                                 break;
1938                         udelay(1);
1939                 }
1940
1941                 if (running)
1942                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1943         }
1944
1945         return 0;
1946 }
1947
1948 /**
1949  * cik_init_microcode - load ucode images from disk
1950  *
1951  * @rdev: radeon_device pointer
1952  *
1953  * Use the firmware interface to load the ucode images into
1954  * the driver (not loaded into hw).
1955  * Returns 0 on success, error on failure.
1956  */
1957 static int cik_init_microcode(struct radeon_device *rdev)
1958 {
1959         const char *chip_name;
1960         const char *new_chip_name;
1961         size_t pfp_req_size, me_req_size, ce_req_size,
1962                 mec_req_size, rlc_req_size, mc_req_size = 0,
1963                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1964         char fw_name[30];
1965         int new_fw = 0;
1966         int err;
1967         int num_fw;
1968
1969         DRM_DEBUG("\n");
1970
1971         switch (rdev->family) {
1972         case CHIP_BONAIRE:
1973                 chip_name = "BONAIRE";
1974                 new_chip_name = "bonaire";
1975                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1976                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1977                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1978                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1979                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1980                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1981                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1982                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1983                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1984                 num_fw = 8;
1985                 break;
1986         case CHIP_HAWAII:
1987                 chip_name = "HAWAII";
1988                 new_chip_name = "hawaii";
1989                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1991                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1995                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1996                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1998                 num_fw = 8;
1999                 break;
2000         case CHIP_KAVERI:
2001                 chip_name = "KAVERI";
2002                 new_chip_name = "kaveri";
2003                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2005                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2008                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009                 num_fw = 7;
2010                 break;
2011         case CHIP_KABINI:
2012                 chip_name = "KABINI";
2013                 new_chip_name = "kabini";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2019                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2020                 num_fw = 6;
2021                 break;
2022         case CHIP_MULLINS:
2023                 chip_name = "MULLINS";
2024                 new_chip_name = "mullins";
2025                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2026                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2027                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2028                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2029                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2030                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031                 num_fw = 6;
2032                 break;
2033         default: BUG();
2034         }
2035
2036         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2037
2038         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2039         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2040         if (err) {
2041                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2042                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2043                 if (err)
2044                         goto out;
2045                 if (rdev->pfp_fw->size != pfp_req_size) {
2046                         printk(KERN_ERR
2047                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2048                                rdev->pfp_fw->size, fw_name);
2049                         err = -EINVAL;
2050                         goto out;
2051                 }
2052         } else {
2053                 err = radeon_ucode_validate(rdev->pfp_fw);
2054                 if (err) {
2055                         printk(KERN_ERR
2056                                "cik_fw: validation failed for firmware \"%s\"\n",
2057                                fw_name);
2058                         goto out;
2059                 } else {
2060                         new_fw++;
2061                 }
2062         }
2063
2064         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2065         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2066         if (err) {
2067                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2068                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2069                 if (err)
2070                         goto out;
2071                 if (rdev->me_fw->size != me_req_size) {
2072                         printk(KERN_ERR
2073                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074                                rdev->me_fw->size, fw_name);
2075                         err = -EINVAL;
2076                 }
2077         } else {
2078                 err = radeon_ucode_validate(rdev->me_fw);
2079                 if (err) {
2080                         printk(KERN_ERR
2081                                "cik_fw: validation failed for firmware \"%s\"\n",
2082                                fw_name);
2083                         goto out;
2084                 } else {
2085                         new_fw++;
2086                 }
2087         }
2088
2089         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2090         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2091         if (err) {
2092                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2093                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2094                 if (err)
2095                         goto out;
2096                 if (rdev->ce_fw->size != ce_req_size) {
2097                         printk(KERN_ERR
2098                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->ce_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->ce_fw);
2104                 if (err) {
2105                         printk(KERN_ERR
2106                                "cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2115         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2118                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->mec_fw->size != mec_req_size) {
2122                         printk(KERN_ERR
2123                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124                                rdev->mec_fw->size, fw_name);
2125                         err = -EINVAL;
2126                 }
2127         } else {
2128                 err = radeon_ucode_validate(rdev->mec_fw);
2129                 if (err) {
2130                         printk(KERN_ERR
2131                                "cik_fw: validation failed for firmware \"%s\"\n",
2132                                fw_name);
2133                         goto out;
2134                 } else {
2135                         new_fw++;
2136                 }
2137         }
2138
2139         if (rdev->family == CHIP_KAVERI) {
2140                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2141                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2142                 if (err) {
2143                         goto out;
2144                 } else {
2145                         err = radeon_ucode_validate(rdev->mec2_fw);
2146                         if (err) {
2147                                 goto out;
2148                         } else {
2149                                 new_fw++;
2150                         }
2151                 }
2152         }
2153
2154         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2155         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2156         if (err) {
2157                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2158                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2159                 if (err)
2160                         goto out;
2161                 if (rdev->rlc_fw->size != rlc_req_size) {
2162                         printk(KERN_ERR
2163                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2164                                rdev->rlc_fw->size, fw_name);
2165                         err = -EINVAL;
2166                 }
2167         } else {
2168                 err = radeon_ucode_validate(rdev->rlc_fw);
2169                 if (err) {
2170                         printk(KERN_ERR
2171                                "cik_fw: validation failed for firmware \"%s\"\n",
2172                                fw_name);
2173                         goto out;
2174                 } else {
2175                         new_fw++;
2176                 }
2177         }
2178
2179         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2180         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2181         if (err) {
2182                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2183                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2184                 if (err)
2185                         goto out;
2186                 if (rdev->sdma_fw->size != sdma_req_size) {
2187                         printk(KERN_ERR
2188                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2189                                rdev->sdma_fw->size, fw_name);
2190                         err = -EINVAL;
2191                 }
2192         } else {
2193                 err = radeon_ucode_validate(rdev->sdma_fw);
2194                 if (err) {
2195                         printk(KERN_ERR
2196                                "cik_fw: validation failed for firmware \"%s\"\n",
2197                                fw_name);
2198                         goto out;
2199                 } else {
2200                         new_fw++;
2201                 }
2202         }
2203
2204         /* No SMC, MC ucode on APUs */
2205         if (!(rdev->flags & RADEON_IS_IGP)) {
2206                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2207                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2208                 if (err) {
2209                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2210                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2211                         if (err) {
2212                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2213                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2214                                 if (err)
2215                                         goto out;
2216                         }
2217                         if ((rdev->mc_fw->size != mc_req_size) &&
2218                             (rdev->mc_fw->size != mc2_req_size)){
2219                                 printk(KERN_ERR
2220                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2221                                        rdev->mc_fw->size, fw_name);
2222                                 err = -EINVAL;
2223                         }
2224                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2225                 } else {
2226                         err = radeon_ucode_validate(rdev->mc_fw);
2227                         if (err) {
2228                                 printk(KERN_ERR
2229                                        "cik_fw: validation failed for firmware \"%s\"\n",
2230                                        fw_name);
2231                                 goto out;
2232                         } else {
2233                                 new_fw++;
2234                         }
2235                 }
2236
2237                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2238                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2239                 if (err) {
2240                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2241                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242                         if (err) {
2243                                 printk(KERN_ERR
2244                                        "smc: error loading firmware \"%s\"\n",
2245                                        fw_name);
2246                                 release_firmware(rdev->smc_fw);
2247                                 rdev->smc_fw = NULL;
2248                                 err = 0;
2249                         } else if (rdev->smc_fw->size != smc_req_size) {
2250                                 printk(KERN_ERR
2251                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2252                                        rdev->smc_fw->size, fw_name);
2253                                 err = -EINVAL;
2254                         }
2255                 } else {
2256                         err = radeon_ucode_validate(rdev->smc_fw);
2257                         if (err) {
2258                                 printk(KERN_ERR
2259                                        "cik_fw: validation failed for firmware \"%s\"\n",
2260                                        fw_name);
2261                                 goto out;
2262                         } else {
2263                                 new_fw++;
2264                         }
2265                 }
2266         }
2267
2268         if (new_fw == 0) {
2269                 rdev->new_fw = false;
2270         } else if (new_fw < num_fw) {
2271                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2272                 err = -EINVAL;
2273         } else {
2274                 rdev->new_fw = true;
2275         }
2276
2277 out:
2278         if (err) {
2279                 if (err != -EINVAL)
2280                         printk(KERN_ERR
2281                                "cik_cp: Failed to load firmware \"%s\"\n",
2282                                fw_name);
2283                 release_firmware(rdev->pfp_fw);
2284                 rdev->pfp_fw = NULL;
2285                 release_firmware(rdev->me_fw);
2286                 rdev->me_fw = NULL;
2287                 release_firmware(rdev->ce_fw);
2288                 rdev->ce_fw = NULL;
2289                 release_firmware(rdev->mec_fw);
2290                 rdev->mec_fw = NULL;
2291                 release_firmware(rdev->mec2_fw);
2292                 rdev->mec2_fw = NULL;
2293                 release_firmware(rdev->rlc_fw);
2294                 rdev->rlc_fw = NULL;
2295                 release_firmware(rdev->sdma_fw);
2296                 rdev->sdma_fw = NULL;
2297                 release_firmware(rdev->mc_fw);
2298                 rdev->mc_fw = NULL;
2299                 release_firmware(rdev->smc_fw);
2300                 rdev->smc_fw = NULL;
2301         }
2302         return err;
2303 }
2304
2305 /*
2306  * Core functions
2307  */
2308 /**
2309  * cik_tiling_mode_table_init - init the hw tiling table
2310  *
2311  * @rdev: radeon_device pointer
2312  *
2313  * Starting with SI, the tiling setup is done globally in a
2314  * set of 32 tiling modes.  Rather than selecting each set of
2315  * parameters per surface as on older asics, we just select
2316  * which index in the tiling table we want to use, and the
2317  * surface uses those parameters (CIK).
2318  */
2319 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2320 {
2321         const u32 num_tile_mode_states = 32;
2322         const u32 num_secondary_tile_mode_states = 16;
2323         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2324         u32 num_pipe_configs;
2325         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2326                 rdev->config.cik.max_shader_engines;
2327
2328         switch (rdev->config.cik.mem_row_size_in_kb) {
2329         case 1:
2330                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2331                 break;
2332         case 2:
2333         default:
2334                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2335                 break;
2336         case 4:
2337                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2338                 break;
2339         }
2340
2341         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2342         if (num_pipe_configs > 8)
2343                 num_pipe_configs = 16;
2344
2345         if (num_pipe_configs == 16) {
2346                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2347                         switch (reg_offset) {
2348                         case 0:
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2353                                 break;
2354                         case 1:
2355                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2359                                 break;
2360                         case 2:
2361                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2365                                 break;
2366                         case 3:
2367                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2371                                 break;
2372                         case 4:
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                                  TILE_SPLIT(split_equal_to_row_size));
2377                                 break;
2378                         case 5:
2379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2380                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382                                 break;
2383                         case 6:
2384                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2388                                 break;
2389                         case 7:
2390                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2392                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                                  TILE_SPLIT(split_equal_to_row_size));
2394                                 break;
2395                         case 8:
2396                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2397                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2398                                 break;
2399                         case 9:
2400                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2403                                 break;
2404                         case 10:
2405                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409                                 break;
2410                         case 11:
2411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2414                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                                 break;
2416                         case 12:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                                 break;
2422                         case 13:
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2426                                 break;
2427                         case 14:
2428                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                                 break;
2433                         case 16:
2434                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                                 break;
2439                         case 17:
2440                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                                 break;
2445                         case 27:
2446                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449                                 break;
2450                         case 28:
2451                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                                 break;
2456                         case 29:
2457                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2458                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2460                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461                                 break;
2462                         case 30:
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467                                 break;
2468                         default:
2469                                 gb_tile_moden = 0;
2470                                 break;
2471                         }
2472                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2473                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2474                 }
2475                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2476                         switch (reg_offset) {
2477                         case 0:
2478                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2482                                 break;
2483                         case 1:
2484                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2488                                 break;
2489                         case 2:
2490                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2494                                 break;
2495                         case 3:
2496                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2500                                 break;
2501                         case 4:
2502                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2506                                 break;
2507                         case 5:
2508                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2512                                 break;
2513                         case 6:
2514                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2518                                 break;
2519                         case 8:
2520                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2524                                 break;
2525                         case 9:
2526                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2530                                 break;
2531                         case 10:
2532                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2536                                 break;
2537                         case 11:
2538                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2542                                 break;
2543                         case 12:
2544                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2548                                 break;
2549                         case 13:
2550                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2554                                 break;
2555                         case 14:
2556                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2560                                 break;
2561                         default:
2562                                 gb_tile_moden = 0;
2563                                 break;
2564                         }
2565                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2566                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2567                 }
2568         } else if (num_pipe_configs == 8) {
2569                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2570                         switch (reg_offset) {
2571                         case 0:
2572                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576                                 break;
2577                         case 1:
2578                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2582                                 break;
2583                         case 2:
2584                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588                                 break;
2589                         case 3:
2590                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2592                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2594                                 break;
2595                         case 4:
2596                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2598                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                                  TILE_SPLIT(split_equal_to_row_size));
2600                                 break;
2601                         case 5:
2602                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605                                 break;
2606                         case 6:
2607                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2611                                 break;
2612                         case 7:
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2614                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2615                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                                  TILE_SPLIT(split_equal_to_row_size));
2617                                 break;
2618                         case 8:
2619                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2620                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2621                                 break;
2622                         case 9:
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2626                                 break;
2627                         case 10:
2628                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632                                 break;
2633                         case 11:
2634                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2637                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2638                                 break;
2639                         case 12:
2640                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2641                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                 break;
2645                         case 13:
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2649                                 break;
2650                         case 14:
2651                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655                                 break;
2656                         case 16:
2657                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2660                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                                 break;
2662                         case 17:
2663                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667                                 break;
2668                         case 27:
2669                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2672                                 break;
2673                         case 28:
2674                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                                 break;
2679                         case 29:
2680                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2682                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684                                 break;
2685                         case 30:
2686                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2687                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690                                 break;
2691                         default:
2692                                 gb_tile_moden = 0;
2693                                 break;
2694                         }
2695                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2696                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2697                 }
2698                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2699                         switch (reg_offset) {
2700                         case 0:
2701                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2705                                 break;
2706                         case 1:
2707                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2711                                 break;
2712                         case 2:
2713                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2717                                 break;
2718                         case 3:
2719                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2723                                 break;
2724                         case 4:
2725                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2729                                 break;
2730                         case 5:
2731                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2735                                 break;
2736                         case 6:
2737                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2740                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2741                                 break;
2742                         case 8:
2743                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2747                                 break;
2748                         case 9:
2749                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2753                                 break;
2754                         case 10:
2755                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2759                                 break;
2760                         case 11:
2761                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2765                                 break;
2766                         case 12:
2767                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2771                                 break;
2772                         case 13:
2773                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2776                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2777                                 break;
2778                         case 14:
2779                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2783                                 break;
2784                         default:
2785                                 gb_tile_moden = 0;
2786                                 break;
2787                         }
2788                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2789                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2790                 }
2791         } else if (num_pipe_configs == 4) {
2792                 if (num_rbs == 4) {
2793                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2794                                 switch (reg_offset) {
2795                                 case 0:
2796                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800                                         break;
2801                                 case 1:
2802                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2806                                         break;
2807                                 case 2:
2808                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812                                         break;
2813                                 case 3:
2814                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2816                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2818                                         break;
2819                                 case 4:
2820                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2822                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                                          TILE_SPLIT(split_equal_to_row_size));
2824                                         break;
2825                                 case 5:
2826                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829                                         break;
2830                                 case 6:
2831                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2835                                         break;
2836                                 case 7:
2837                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2838                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2839                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840                                                          TILE_SPLIT(split_equal_to_row_size));
2841                                         break;
2842                                 case 8:
2843                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2844                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2845                                         break;
2846                                 case 9:
2847                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2850                                         break;
2851                                 case 10:
2852                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856                                         break;
2857                                 case 11:
2858                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2860                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2861                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862                                         break;
2863                                 case 12:
2864                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868                                         break;
2869                                 case 13:
2870                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2871                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2873                                         break;
2874                                 case 14:
2875                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879                                         break;
2880                                 case 16:
2881                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2882                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                                         break;
2886                                 case 17:
2887                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2888                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2889                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                                         break;
2892                                 case 27:
2893                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2894                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2896                                         break;
2897                                 case 28:
2898                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2899                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902                                         break;
2903                                 case 29:
2904                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908                                         break;
2909                                 case 30:
2910                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2911                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914                                         break;
2915                                 default:
2916                                         gb_tile_moden = 0;
2917                                         break;
2918                                 }
2919                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2920                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2921                         }
2922                 } else if (num_rbs < 4) {
2923                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2924                                 switch (reg_offset) {
2925                                 case 0:
2926                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2930                                         break;
2931                                 case 1:
2932                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2936                                         break;
2937                                 case 2:
2938                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2940                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2941                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2942                                         break;
2943                                 case 3:
2944                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2946                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2947                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2948                                         break;
2949                                 case 4:
2950                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2952                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2953                                                          TILE_SPLIT(split_equal_to_row_size));
2954                                         break;
2955                                 case 5:
2956                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2957                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959                                         break;
2960                                 case 6:
2961                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2963                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2965                                         break;
2966                                 case 7:
2967                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2968                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2969                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2970                                                          TILE_SPLIT(split_equal_to_row_size));
2971                                         break;
2972                                 case 8:
2973                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2974                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2975                                         break;
2976                                 case 9:
2977                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2980                                         break;
2981                                 case 10:
2982                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2984                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986                                         break;
2987                                 case 11:
2988                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2989                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2990                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992                                         break;
2993                                 case 12:
2994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998                                         break;
2999                                 case 13:
3000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3003                                         break;
3004                                 case 14:
3005                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009                                         break;
3010                                 case 16:
3011                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3013                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3014                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015                                         break;
3016                                 case 17:
3017                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3018                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3020                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                                         break;
3022                                 case 27:
3023                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3025                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3026                                         break;
3027                                 case 28:
3028                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3029                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3030                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3031                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032                                         break;
3033                                 case 29:
3034                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3035                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3037                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038                                         break;
3039                                 case 30:
3040                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3041                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3043                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3044                                         break;
3045                                 default:
3046                                         gb_tile_moden = 0;
3047                                         break;
3048                                 }
3049                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3050                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3051                         }
3052                 }
3053                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3054                         switch (reg_offset) {
3055                         case 0:
3056                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3060                                 break;
3061                         case 1:
3062                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3066                                 break;
3067                         case 2:
3068                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3072                                 break;
3073                         case 3:
3074                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3078                                 break;
3079                         case 4:
3080                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3084                                 break;
3085                         case 5:
3086                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3089                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3090                                 break;
3091                         case 6:
3092                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3094                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3095                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3096                                 break;
3097                         case 8:
3098                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3099                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3100                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3102                                 break;
3103                         case 9:
3104                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3105                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3106                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3107                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3108                                 break;
3109                         case 10:
3110                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3112                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3113                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3114                                 break;
3115                         case 11:
3116                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3118                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3120                                 break;
3121                         case 12:
3122                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3126                                 break;
3127                         case 13:
3128                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3132                                 break;
3133                         case 14:
3134                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3137                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3138                                 break;
3139                         default:
3140                                 gb_tile_moden = 0;
3141                                 break;
3142                         }
3143                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3144                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3145                 }
3146         } else if (num_pipe_configs == 2) {
3147                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3148                         switch (reg_offset) {
3149                         case 0:
3150                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3154                                 break;
3155                         case 1:
3156                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3157                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3160                                 break;
3161                         case 2:
3162                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3164                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3166                                 break;
3167                         case 3:
3168                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3170                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3172                                 break;
3173                         case 4:
3174                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3176                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                                  TILE_SPLIT(split_equal_to_row_size));
3178                                 break;
3179                         case 5:
3180                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183                                 break;
3184                         case 6:
3185                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3187                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3188                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3189                                 break;
3190                         case 7:
3191                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3192                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3193                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                                  TILE_SPLIT(split_equal_to_row_size));
3195                                 break;
3196                         case 8:
3197                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198                                                 PIPE_CONFIG(ADDR_SURF_P2);
3199                                 break;
3200                         case 9:
3201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3202                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203                                                  PIPE_CONFIG(ADDR_SURF_P2));
3204                                 break;
3205                         case 10:
3206                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3208                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210                                 break;
3211                         case 11:
3212                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3214                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3215                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216                                 break;
3217                         case 12:
3218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3220                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                                 break;
3223                         case 13:
3224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3227                                 break;
3228                         case 14:
3229                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3232                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233                                 break;
3234                         case 16:
3235                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3238                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239                                 break;
3240                         case 17:
3241                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3242                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3244                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245                                 break;
3246                         case 27:
3247                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3249                                                  PIPE_CONFIG(ADDR_SURF_P2));
3250                                 break;
3251                         case 28:
3252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3255                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256                                 break;
3257                         case 29:
3258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262                                 break;
3263                         case 30:
3264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3265                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3267                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268                                 break;
3269                         default:
3270                                 gb_tile_moden = 0;
3271                                 break;
3272                         }
3273                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3274                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3275                 }
3276                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3277                         switch (reg_offset) {
3278                         case 0:
3279                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3283                                 break;
3284                         case 1:
3285                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3289                                 break;
3290                         case 2:
3291                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3295                                 break;
3296                         case 3:
3297                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3301                                 break;
3302                         case 4:
3303                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3307                                 break;
3308                         case 5:
3309                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3313                                 break;
3314                         case 6:
3315                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3317                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3318                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3319                                 break;
3320                         case 8:
3321                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3323                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3325                                 break;
3326                         case 9:
3327                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3328                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3329                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3331                                 break;
3332                         case 10:
3333                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3334                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3335                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3337                                 break;
3338                         case 11:
3339                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3340                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3341                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3343                                 break;
3344                         case 12:
3345                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3347                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3349                                 break;
3350                         case 13:
3351                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3355                                 break;
3356                         case 14:
3357                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3361                                 break;
3362                         default:
3363                                 gb_tile_moden = 0;
3364                                 break;
3365                         }
3366                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3367                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3368                 }
3369         } else
3370                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3371 }
3372
3373 /**
3374  * cik_select_se_sh - select which SE, SH to address
3375  *
3376  * @rdev: radeon_device pointer
3377  * @se_num: shader engine to address
3378  * @sh_num: sh block to address
3379  *
3380  * Select which SE, SH combinations to address. Certain
3381  * registers are instanced per SE or SH.  0xffffffff means
3382  * broadcast to all SEs or SHs (CIK).
3383  */
3384 static void cik_select_se_sh(struct radeon_device *rdev,
3385                              u32 se_num, u32 sh_num)
3386 {
3387         u32 data = INSTANCE_BROADCAST_WRITES;
3388
3389         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3390                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3391         else if (se_num == 0xffffffff)
3392                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3393         else if (sh_num == 0xffffffff)
3394                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3395         else
3396                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3397         WREG32(GRBM_GFX_INDEX, data);
3398 }
3399
3400 /**
3401  * cik_create_bitmask - create a bitmask
3402  *
3403  * @bit_width: length of the mask
3404  *
3405  * create a variable length bit mask (CIK).
3406  * Returns the bitmask.
3407  */
3408 static u32 cik_create_bitmask(u32 bit_width)
3409 {
3410         u32 i, mask = 0;
3411
3412         for (i = 0; i < bit_width; i++) {
3413                 mask <<= 1;
3414                 mask |= 1;
3415         }
3416         return mask;
3417 }
3418
3419 /**
3420  * cik_get_rb_disabled - computes the mask of disabled RBs
3421  *
3422  * @rdev: radeon_device pointer
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  * @se_num: number of SEs (shader engines) for the asic
3425  * @sh_per_se: number of SH blocks per SE for the asic
3426  *
3427  * Calculates the bitmask of disabled RBs (CIK).
3428  * Returns the disabled RB bitmask.
3429  */
3430 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3431                               u32 max_rb_num_per_se,
3432                               u32 sh_per_se)
3433 {
3434         u32 data, mask;
3435
3436         data = RREG32(CC_RB_BACKEND_DISABLE);
3437         if (data & 1)
3438                 data &= BACKEND_DISABLE_MASK;
3439         else
3440                 data = 0;
3441         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3442
3443         data >>= BACKEND_DISABLE_SHIFT;
3444
3445         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3446
3447         return data & mask;
3448 }
3449
3450 /**
3451  * cik_setup_rb - setup the RBs on the asic
3452  *
3453  * @rdev: radeon_device pointer
3454  * @se_num: number of SEs (shader engines) for the asic
3455  * @sh_per_se: number of SH blocks per SE for the asic
3456  * @max_rb_num: max RBs (render backends) for the asic
3457  *
3458  * Configures per-SE/SH RB registers (CIK).
3459  */
3460 static void cik_setup_rb(struct radeon_device *rdev,
3461                          u32 se_num, u32 sh_per_se,
3462                          u32 max_rb_num_per_se)
3463 {
3464         int i, j;
3465         u32 data, mask;
3466         u32 disabled_rbs = 0;
3467         u32 enabled_rbs = 0;
3468
3469         mutex_lock(&rdev->grbm_idx_mutex);
3470         for (i = 0; i < se_num; i++) {
3471                 for (j = 0; j < sh_per_se; j++) {
3472                         cik_select_se_sh(rdev, i, j);
3473                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3474                         if (rdev->family == CHIP_HAWAII)
3475                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3476                         else
3477                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3478                 }
3479         }
3480         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3481         mutex_unlock(&rdev->grbm_idx_mutex);
3482
3483         mask = 1;
3484         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3485                 if (!(disabled_rbs & mask))
3486                         enabled_rbs |= mask;
3487                 mask <<= 1;
3488         }
3489
3490         rdev->config.cik.backend_enable_mask = enabled_rbs;
3491
3492         mutex_lock(&rdev->grbm_idx_mutex);
3493         for (i = 0; i < se_num; i++) {
3494                 cik_select_se_sh(rdev, i, 0xffffffff);
3495                 data = 0;
3496                 for (j = 0; j < sh_per_se; j++) {
3497                         switch (enabled_rbs & 3) {
3498                         case 0:
3499                                 if (j == 0)
3500                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3501                                 else
3502                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3503                                 break;
3504                         case 1:
3505                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3506                                 break;
3507                         case 2:
3508                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3509                                 break;
3510                         case 3:
3511                         default:
3512                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3513                                 break;
3514                         }
3515                         enabled_rbs >>= 2;
3516                 }
3517                 WREG32(PA_SC_RASTER_CONFIG, data);
3518         }
3519         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3520         mutex_unlock(&rdev->grbm_idx_mutex);
3521 }
3522
3523 /**
3524  * cik_gpu_init - setup the 3D engine
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Configures the 3D engine and tiling configuration
3529  * registers so that the 3D engine is usable.
3530  */
3531 static void cik_gpu_init(struct radeon_device *rdev)
3532 {
3533         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3534         u32 mc_shared_chmap, mc_arb_ramcfg;
3535         u32 hdp_host_path_cntl;
3536         u32 tmp;
3537         int i, j;
3538
3539         switch (rdev->family) {
3540         case CHIP_BONAIRE:
3541                 rdev->config.cik.max_shader_engines = 2;
3542                 rdev->config.cik.max_tile_pipes = 4;
3543                 rdev->config.cik.max_cu_per_sh = 7;
3544                 rdev->config.cik.max_sh_per_se = 1;
3545                 rdev->config.cik.max_backends_per_se = 2;
3546                 rdev->config.cik.max_texture_channel_caches = 4;
3547                 rdev->config.cik.max_gprs = 256;
3548                 rdev->config.cik.max_gs_threads = 32;
3549                 rdev->config.cik.max_hw_contexts = 8;
3550
3551                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3552                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3553                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3554                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3555                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3556                 break;
3557         case CHIP_HAWAII:
3558                 rdev->config.cik.max_shader_engines = 4;
3559                 rdev->config.cik.max_tile_pipes = 16;
3560                 rdev->config.cik.max_cu_per_sh = 11;
3561                 rdev->config.cik.max_sh_per_se = 1;
3562                 rdev->config.cik.max_backends_per_se = 4;
3563                 rdev->config.cik.max_texture_channel_caches = 16;
3564                 rdev->config.cik.max_gprs = 256;
3565                 rdev->config.cik.max_gs_threads = 32;
3566                 rdev->config.cik.max_hw_contexts = 8;
3567
3568                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3569                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3570                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3571                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3572                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3573                 break;
3574         case CHIP_KAVERI:
3575                 rdev->config.cik.max_shader_engines = 1;
3576                 rdev->config.cik.max_tile_pipes = 4;
3577                 if ((rdev->pdev->device == 0x1304) ||
3578                     (rdev->pdev->device == 0x1305) ||
3579                     (rdev->pdev->device == 0x130C) ||
3580                     (rdev->pdev->device == 0x130F) ||
3581                     (rdev->pdev->device == 0x1310) ||
3582                     (rdev->pdev->device == 0x1311) ||
3583                     (rdev->pdev->device == 0x131C)) {
3584                         rdev->config.cik.max_cu_per_sh = 8;
3585                         rdev->config.cik.max_backends_per_se = 2;
3586                 } else if ((rdev->pdev->device == 0x1309) ||
3587                            (rdev->pdev->device == 0x130A) ||
3588                            (rdev->pdev->device == 0x130D) ||
3589                            (rdev->pdev->device == 0x1313) ||
3590                            (rdev->pdev->device == 0x131D)) {
3591                         rdev->config.cik.max_cu_per_sh = 6;
3592                         rdev->config.cik.max_backends_per_se = 2;
3593                 } else if ((rdev->pdev->device == 0x1306) ||
3594                            (rdev->pdev->device == 0x1307) ||
3595                            (rdev->pdev->device == 0x130B) ||
3596                            (rdev->pdev->device == 0x130E) ||
3597                            (rdev->pdev->device == 0x1315) ||
3598                            (rdev->pdev->device == 0x1318) ||
3599                            (rdev->pdev->device == 0x131B)) {
3600                         rdev->config.cik.max_cu_per_sh = 4;
3601                         rdev->config.cik.max_backends_per_se = 1;
3602                 } else {
3603                         rdev->config.cik.max_cu_per_sh = 3;
3604                         rdev->config.cik.max_backends_per_se = 1;
3605                 }
3606                 rdev->config.cik.max_sh_per_se = 1;
3607                 rdev->config.cik.max_texture_channel_caches = 4;
3608                 rdev->config.cik.max_gprs = 256;
3609                 rdev->config.cik.max_gs_threads = 16;
3610                 rdev->config.cik.max_hw_contexts = 8;
3611
3612                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3613                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3614                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3615                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3616                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3617                 break;
3618         case CHIP_KABINI:
3619         case CHIP_MULLINS:
3620         default:
3621                 rdev->config.cik.max_shader_engines = 1;
3622                 rdev->config.cik.max_tile_pipes = 2;
3623                 rdev->config.cik.max_cu_per_sh = 2;
3624                 rdev->config.cik.max_sh_per_se = 1;
3625                 rdev->config.cik.max_backends_per_se = 1;
3626                 rdev->config.cik.max_texture_channel_caches = 2;
3627                 rdev->config.cik.max_gprs = 256;
3628                 rdev->config.cik.max_gs_threads = 16;
3629                 rdev->config.cik.max_hw_contexts = 8;
3630
3631                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3632                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3633                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3634                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3635                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3636                 break;
3637         }
3638
3639         /* Initialize HDP */
3640         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3641                 WREG32((0x2c14 + j), 0x00000000);
3642                 WREG32((0x2c18 + j), 0x00000000);
3643                 WREG32((0x2c1c + j), 0x00000000);
3644                 WREG32((0x2c20 + j), 0x00000000);
3645                 WREG32((0x2c24 + j), 0x00000000);
3646         }
3647
3648         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3649         WREG32(SRBM_INT_CNTL, 0x1);
3650         WREG32(SRBM_INT_ACK, 0x1);
3651
3652         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3653
3654         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3655         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3656
3657         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3658         rdev->config.cik.mem_max_burst_length_bytes = 256;
3659         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3660         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3661         if (rdev->config.cik.mem_row_size_in_kb > 4)
3662                 rdev->config.cik.mem_row_size_in_kb = 4;
3663         /* XXX use MC settings? */
3664         rdev->config.cik.shader_engine_tile_size = 32;
3665         rdev->config.cik.num_gpus = 1;
3666         rdev->config.cik.multi_gpu_tile_size = 64;
3667
3668         /* fix up row size */
3669         gb_addr_config &= ~ROW_SIZE_MASK;
3670         switch (rdev->config.cik.mem_row_size_in_kb) {
3671         case 1:
3672         default:
3673                 gb_addr_config |= ROW_SIZE(0);
3674                 break;
3675         case 2:
3676                 gb_addr_config |= ROW_SIZE(1);
3677                 break;
3678         case 4:
3679                 gb_addr_config |= ROW_SIZE(2);
3680                 break;
3681         }
3682
3683         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3684          * not have bank info, so create a custom tiling dword.
3685          * bits 3:0   num_pipes
3686          * bits 7:4   num_banks
3687          * bits 11:8  group_size
3688          * bits 15:12 row_size
3689          */
3690         rdev->config.cik.tile_config = 0;
3691         switch (rdev->config.cik.num_tile_pipes) {
3692         case 1:
3693                 rdev->config.cik.tile_config |= (0 << 0);
3694                 break;
3695         case 2:
3696                 rdev->config.cik.tile_config |= (1 << 0);
3697                 break;
3698         case 4:
3699                 rdev->config.cik.tile_config |= (2 << 0);
3700                 break;
3701         case 8:
3702         default:
3703                 /* XXX what about 12? */
3704                 rdev->config.cik.tile_config |= (3 << 0);
3705                 break;
3706         }
3707         rdev->config.cik.tile_config |=
3708                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3709         rdev->config.cik.tile_config |=
3710                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3711         rdev->config.cik.tile_config |=
3712                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3713
3714         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3715         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3716         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3717         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3718         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3719         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3720         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3721         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3722
3723         cik_tiling_mode_table_init(rdev);
3724
3725         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3726                      rdev->config.cik.max_sh_per_se,
3727                      rdev->config.cik.max_backends_per_se);
3728
3729         rdev->config.cik.active_cus = 0;
3730         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3731                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3732                         rdev->config.cik.active_cus +=
3733                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3734                 }
3735         }
3736
3737         /* set HW defaults for 3D engine */
3738         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3739
3740         mutex_lock(&rdev->grbm_idx_mutex);
3741         /*
3742          * making sure that the following register writes will be broadcasted
3743          * to all the shaders
3744          */
3745         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3746         WREG32(SX_DEBUG_1, 0x20);
3747
3748         WREG32(TA_CNTL_AUX, 0x00010000);
3749
3750         tmp = RREG32(SPI_CONFIG_CNTL);
3751         tmp |= 0x03000000;
3752         WREG32(SPI_CONFIG_CNTL, tmp);
3753
3754         WREG32(SQ_CONFIG, 1);
3755
3756         WREG32(DB_DEBUG, 0);
3757
3758         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3759         tmp |= 0x00000400;
3760         WREG32(DB_DEBUG2, tmp);
3761
3762         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3763         tmp |= 0x00020200;
3764         WREG32(DB_DEBUG3, tmp);
3765
3766         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3767         tmp |= 0x00018208;
3768         WREG32(CB_HW_CONTROL, tmp);
3769
3770         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3771
3772         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3773                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3774                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3775                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3776
3777         WREG32(VGT_NUM_INSTANCES, 1);
3778
3779         WREG32(CP_PERFMON_CNTL, 0);
3780
3781         WREG32(SQ_CONFIG, 0);
3782
3783         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3784                                           FORCE_EOV_MAX_REZ_CNT(255)));
3785
3786         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3787                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3788
3789         WREG32(VGT_GS_VERTEX_REUSE, 16);
3790         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3791
3792         tmp = RREG32(HDP_MISC_CNTL);
3793         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3794         WREG32(HDP_MISC_CNTL, tmp);
3795
3796         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3797         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3798
3799         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3800         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3801         mutex_unlock(&rdev->grbm_idx_mutex);
3802
3803         udelay(50);
3804 }
3805
3806 /*
3807  * GPU scratch registers helpers function.
3808  */
3809 /**
3810  * cik_scratch_init - setup driver info for CP scratch regs
3811  *
3812  * @rdev: radeon_device pointer
3813  *
3814  * Set up the number and offset of the CP scratch registers.
3815  * NOTE: use of CP scratch registers is a legacy inferface and
3816  * is not used by default on newer asics (r6xx+).  On newer asics,
3817  * memory buffers are used for fences rather than scratch regs.
3818  */
3819 static void cik_scratch_init(struct radeon_device *rdev)
3820 {
3821         int i;
3822
3823         rdev->scratch.num_reg = 7;
3824         rdev->scratch.reg_base = SCRATCH_REG0;
3825         for (i = 0; i < rdev->scratch.num_reg; i++) {
3826                 rdev->scratch.free[i] = true;
3827                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3828         }
3829 }
3830
3831 /**
3832  * cik_ring_test - basic gfx ring test
3833  *
3834  * @rdev: radeon_device pointer
3835  * @ring: radeon_ring structure holding ring information
3836  *
3837  * Allocate a scratch register and write to it using the gfx ring (CIK).
3838  * Provides a basic gfx ring test to verify that the ring is working.
3839  * Used by cik_cp_gfx_resume();
3840  * Returns 0 on success, error on failure.
3841  */
3842 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3843 {
3844         uint32_t scratch;
3845         uint32_t tmp = 0;
3846         unsigned i;
3847         int r;
3848
3849         r = radeon_scratch_get(rdev, &scratch);
3850         if (r) {
3851                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3852                 return r;
3853         }
3854         WREG32(scratch, 0xCAFEDEAD);
3855         r = radeon_ring_lock(rdev, ring, 3);
3856         if (r) {
3857                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3858                 radeon_scratch_free(rdev, scratch);
3859                 return r;
3860         }
3861         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3862         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3863         radeon_ring_write(ring, 0xDEADBEEF);
3864         radeon_ring_unlock_commit(rdev, ring, false);
3865
3866         for (i = 0; i < rdev->usec_timeout; i++) {
3867                 tmp = RREG32(scratch);
3868                 if (tmp == 0xDEADBEEF)
3869                         break;
3870                 DRM_UDELAY(1);
3871         }
3872         if (i < rdev->usec_timeout) {
3873                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3874         } else {
3875                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3876                           ring->idx, scratch, tmp);
3877                 r = -EINVAL;
3878         }
3879         radeon_scratch_free(rdev, scratch);
3880         return r;
3881 }
3882
3883 /**
3884  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3885  *
3886  * @rdev: radeon_device pointer
3887  * @ridx: radeon ring index
3888  *
3889  * Emits an hdp flush on the cp.
3890  */
3891 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3892                                        int ridx)
3893 {
3894         struct radeon_ring *ring = &rdev->ring[ridx];
3895         u32 ref_and_mask;
3896
3897         switch (ring->idx) {
3898         case CAYMAN_RING_TYPE_CP1_INDEX:
3899         case CAYMAN_RING_TYPE_CP2_INDEX:
3900         default:
3901                 switch (ring->me) {
3902                 case 0:
3903                         ref_and_mask = CP2 << ring->pipe;
3904                         break;
3905                 case 1:
3906                         ref_and_mask = CP6 << ring->pipe;
3907                         break;
3908                 default:
3909                         return;
3910                 }
3911                 break;
3912         case RADEON_RING_TYPE_GFX_INDEX:
3913                 ref_and_mask = CP0;
3914                 break;
3915         }
3916
3917         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3918         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3919                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3920                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3921         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3922         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3923         radeon_ring_write(ring, ref_and_mask);
3924         radeon_ring_write(ring, ref_and_mask);
3925         radeon_ring_write(ring, 0x20); /* poll interval */
3926 }
3927
3928 /**
3929  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3930  *
3931  * @rdev: radeon_device pointer
3932  * @fence: radeon fence object
3933  *
3934  * Emits a fence sequnce number on the gfx ring and flushes
3935  * GPU caches.
3936  */
3937 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3938                              struct radeon_fence *fence)
3939 {
3940         struct radeon_ring *ring = &rdev->ring[fence->ring];
3941         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3942
3943         /* Workaround for cache flush problems. First send a dummy EOP
3944          * event down the pipe with seq one below.
3945          */
3946         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3947         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3948                                  EOP_TC_ACTION_EN |
3949                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3950                                  EVENT_INDEX(5)));
3951         radeon_ring_write(ring, addr & 0xfffffffc);
3952         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3953                                 DATA_SEL(1) | INT_SEL(0));
3954         radeon_ring_write(ring, fence->seq - 1);
3955         radeon_ring_write(ring, 0);
3956
3957         /* Then send the real EOP event down the pipe. */
3958         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3959         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3960                                  EOP_TC_ACTION_EN |
3961                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3962                                  EVENT_INDEX(5)));
3963         radeon_ring_write(ring, addr & 0xfffffffc);
3964         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3965         radeon_ring_write(ring, fence->seq);
3966         radeon_ring_write(ring, 0);
3967 }
3968
3969 /**
3970  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3971  *
3972  * @rdev: radeon_device pointer
3973  * @fence: radeon fence object
3974  *
3975  * Emits a fence sequnce number on the compute ring and flushes
3976  * GPU caches.
3977  */
3978 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3979                                  struct radeon_fence *fence)
3980 {
3981         struct radeon_ring *ring = &rdev->ring[fence->ring];
3982         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3983
3984         /* RELEASE_MEM - flush caches, send int */
3985         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3986         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3987                                  EOP_TC_ACTION_EN |
3988                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3989                                  EVENT_INDEX(5)));
3990         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3991         radeon_ring_write(ring, addr & 0xfffffffc);
3992         radeon_ring_write(ring, upper_32_bits(addr));
3993         radeon_ring_write(ring, fence->seq);
3994         radeon_ring_write(ring, 0);
3995 }
3996
3997 /**
3998  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3999  *
4000  * @rdev: radeon_device pointer
4001  * @ring: radeon ring buffer object
4002  * @semaphore: radeon semaphore object
4003  * @emit_wait: Is this a sempahore wait?
4004  *
4005  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4006  * from running ahead of semaphore waits.
4007  */
4008 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4009                              struct radeon_ring *ring,
4010                              struct radeon_semaphore *semaphore,
4011                              bool emit_wait)
4012 {
4013         uint64_t addr = semaphore->gpu_addr;
4014         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4015
4016         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4017         radeon_ring_write(ring, lower_32_bits(addr));
4018         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4019
4020         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4021                 /* Prevent the PFP from running ahead of the semaphore wait */
4022                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4023                 radeon_ring_write(ring, 0x0);
4024         }
4025
4026         return true;
4027 }
4028
4029 /**
4030  * cik_copy_cpdma - copy pages using the CP DMA engine
4031  *
4032  * @rdev: radeon_device pointer
4033  * @src_offset: src GPU address
4034  * @dst_offset: dst GPU address
4035  * @num_gpu_pages: number of GPU pages to xfer
4036  * @resv: reservation object to sync to
4037  *
4038  * Copy GPU paging using the CP DMA engine (CIK+).
4039  * Used by the radeon ttm implementation to move pages if
4040  * registered as the asic copy callback.
4041  */
4042 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4043                                     uint64_t src_offset, uint64_t dst_offset,
4044                                     unsigned num_gpu_pages,
4045                                     struct reservation_object *resv)
4046 {
4047         struct radeon_fence *fence;
4048         struct radeon_sync sync;
4049         int ring_index = rdev->asic->copy.blit_ring_index;
4050         struct radeon_ring *ring = &rdev->ring[ring_index];
4051         u32 size_in_bytes, cur_size_in_bytes, control;
4052         int i, num_loops;
4053         int r = 0;
4054
4055         radeon_sync_create(&sync);
4056
4057         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4058         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4059         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4060         if (r) {
4061                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4062                 radeon_sync_free(rdev, &sync, NULL);
4063                 return ERR_PTR(r);
4064         }
4065
4066         radeon_sync_resv(rdev, &sync, resv, false);
4067         radeon_sync_rings(rdev, &sync, ring->idx);
4068
4069         for (i = 0; i < num_loops; i++) {
4070                 cur_size_in_bytes = size_in_bytes;
4071                 if (cur_size_in_bytes > 0x1fffff)
4072                         cur_size_in_bytes = 0x1fffff;
4073                 size_in_bytes -= cur_size_in_bytes;
4074                 control = 0;
4075                 if (size_in_bytes == 0)
4076                         control |= PACKET3_DMA_DATA_CP_SYNC;
4077                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4078                 radeon_ring_write(ring, control);
4079                 radeon_ring_write(ring, lower_32_bits(src_offset));
4080                 radeon_ring_write(ring, upper_32_bits(src_offset));
4081                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4082                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4083                 radeon_ring_write(ring, cur_size_in_bytes);
4084                 src_offset += cur_size_in_bytes;
4085                 dst_offset += cur_size_in_bytes;
4086         }
4087
4088         r = radeon_fence_emit(rdev, &fence, ring->idx);
4089         if (r) {
4090                 radeon_ring_unlock_undo(rdev, ring);
4091                 radeon_sync_free(rdev, &sync, NULL);
4092                 return ERR_PTR(r);
4093         }
4094
4095         radeon_ring_unlock_commit(rdev, ring, false);
4096         radeon_sync_free(rdev, &sync, fence);
4097
4098         return fence;
4099 }
4100
4101 /*
4102  * IB stuff
4103  */
4104 /**
4105  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4106  *
4107  * @rdev: radeon_device pointer
4108  * @ib: radeon indirect buffer object
4109  *
4110  * Emits an DE (drawing engine) or CE (constant engine) IB
4111  * on the gfx ring.  IBs are usually generated by userspace
4112  * acceleration drivers and submitted to the kernel for
4113  * sheduling on the ring.  This function schedules the IB
4114  * on the gfx ring for execution by the GPU.
4115  */
4116 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4117 {
4118         struct radeon_ring *ring = &rdev->ring[ib->ring];
4119         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4120         u32 header, control = INDIRECT_BUFFER_VALID;
4121
4122         if (ib->is_const_ib) {
4123                 /* set switch buffer packet before const IB */
4124                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4125                 radeon_ring_write(ring, 0);
4126
4127                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4128         } else {
4129                 u32 next_rptr;
4130                 if (ring->rptr_save_reg) {
4131                         next_rptr = ring->wptr + 3 + 4;
4132                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4133                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4134                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4135                         radeon_ring_write(ring, next_rptr);
4136                 } else if (rdev->wb.enabled) {
4137                         next_rptr = ring->wptr + 5 + 4;
4138                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4139                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4140                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4141                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4142                         radeon_ring_write(ring, next_rptr);
4143                 }
4144
4145                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4146         }
4147
4148         control |= ib->length_dw | (vm_id << 24);
4149
4150         radeon_ring_write(ring, header);
4151         radeon_ring_write(ring,
4152 #ifdef __BIG_ENDIAN
4153                           (2 << 0) |
4154 #endif
4155                           (ib->gpu_addr & 0xFFFFFFFC));
4156         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4157         radeon_ring_write(ring, control);
4158 }
4159
4160 /**
4161  * cik_ib_test - basic gfx ring IB test
4162  *
4163  * @rdev: radeon_device pointer
4164  * @ring: radeon_ring structure holding ring information
4165  *
4166  * Allocate an IB and execute it on the gfx ring (CIK).
4167  * Provides a basic gfx ring test to verify that IBs are working.
4168  * Returns 0 on success, error on failure.
4169  */
4170 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4171 {
4172         struct radeon_ib ib;
4173         uint32_t scratch;
4174         uint32_t tmp = 0;
4175         unsigned i;
4176         int r;
4177
4178         r = radeon_scratch_get(rdev, &scratch);
4179         if (r) {
4180                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4181                 return r;
4182         }
4183         WREG32(scratch, 0xCAFEDEAD);
4184         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4185         if (r) {
4186                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4187                 radeon_scratch_free(rdev, scratch);
4188                 return r;
4189         }
4190         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4191         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4192         ib.ptr[2] = 0xDEADBEEF;
4193         ib.length_dw = 3;
4194         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4195         if (r) {
4196                 radeon_scratch_free(rdev, scratch);
4197                 radeon_ib_free(rdev, &ib);
4198                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4199                 return r;
4200         }
4201         r = radeon_fence_wait(ib.fence, false);
4202         if (r) {
4203                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4204                 radeon_scratch_free(rdev, scratch);
4205                 radeon_ib_free(rdev, &ib);
4206                 return r;
4207         }
4208         for (i = 0; i < rdev->usec_timeout; i++) {
4209                 tmp = RREG32(scratch);
4210                 if (tmp == 0xDEADBEEF)
4211                         break;
4212                 DRM_UDELAY(1);
4213         }
4214         if (i < rdev->usec_timeout) {
4215                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4216         } else {
4217                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4218                           scratch, tmp);
4219                 r = -EINVAL;
4220         }
4221         radeon_scratch_free(rdev, scratch);
4222         radeon_ib_free(rdev, &ib);
4223         return r;
4224 }
4225
4226 /*
4227  * CP.
4228  * On CIK, gfx and compute now have independant command processors.
4229  *
4230  * GFX
4231  * Gfx consists of a single ring and can process both gfx jobs and
4232  * compute jobs.  The gfx CP consists of three microengines (ME):
4233  * PFP - Pre-Fetch Parser
4234  * ME - Micro Engine
4235  * CE - Constant Engine
4236  * The PFP and ME make up what is considered the Drawing Engine (DE).
4237  * The CE is an asynchronous engine used for updating buffer desciptors
4238  * used by the DE so that they can be loaded into cache in parallel
4239  * while the DE is processing state update packets.
4240  *
4241  * Compute
4242  * The compute CP consists of two microengines (ME):
4243  * MEC1 - Compute MicroEngine 1
4244  * MEC2 - Compute MicroEngine 2
4245  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4246  * The queues are exposed to userspace and are programmed directly
4247  * by the compute runtime.
4248  */
4249 /**
4250  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4251  *
4252  * @rdev: radeon_device pointer
4253  * @enable: enable or disable the MEs
4254  *
4255  * Halts or unhalts the gfx MEs.
4256  */
4257 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4258 {
4259         if (enable)
4260                 WREG32(CP_ME_CNTL, 0);
4261         else {
4262                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4263                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4264                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4265                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4266         }
4267         udelay(50);
4268 }
4269
4270 /**
4271  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4272  *
4273  * @rdev: radeon_device pointer
4274  *
4275  * Loads the gfx PFP, ME, and CE ucode.
4276  * Returns 0 for success, -EINVAL if the ucode is not available.
4277  */
4278 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4279 {
4280         int i;
4281
4282         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4283                 return -EINVAL;
4284
4285         cik_cp_gfx_enable(rdev, false);
4286
4287         if (rdev->new_fw) {
4288                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4289                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4290                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4291                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4292                 const struct gfx_firmware_header_v1_0 *me_hdr =
4293                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4294                 const __le32 *fw_data;
4295                 u32 fw_size;
4296
4297                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4298                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4299                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4300
4301                 /* PFP */
4302                 fw_data = (const __le32 *)
4303                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4304                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4305                 WREG32(CP_PFP_UCODE_ADDR, 0);
4306                 for (i = 0; i < fw_size; i++)
4307                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4308                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4309
4310                 /* CE */
4311                 fw_data = (const __le32 *)
4312                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4313                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4314                 WREG32(CP_CE_UCODE_ADDR, 0);
4315                 for (i = 0; i < fw_size; i++)
4316                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4317                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4318
4319                 /* ME */
4320                 fw_data = (const __be32 *)
4321                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4322                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4323                 WREG32(CP_ME_RAM_WADDR, 0);
4324                 for (i = 0; i < fw_size; i++)
4325                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4326                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4327                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4328         } else {
4329                 const __be32 *fw_data;
4330
4331                 /* PFP */
4332                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4333                 WREG32(CP_PFP_UCODE_ADDR, 0);
4334                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4335                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4336                 WREG32(CP_PFP_UCODE_ADDR, 0);
4337
4338                 /* CE */
4339                 fw_data = (const __be32 *)rdev->ce_fw->data;
4340                 WREG32(CP_CE_UCODE_ADDR, 0);
4341                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4342                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4343                 WREG32(CP_CE_UCODE_ADDR, 0);
4344
4345                 /* ME */
4346                 fw_data = (const __be32 *)rdev->me_fw->data;
4347                 WREG32(CP_ME_RAM_WADDR, 0);
4348                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4349                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4350                 WREG32(CP_ME_RAM_WADDR, 0);
4351         }
4352
4353         return 0;
4354 }
4355
4356 /**
4357  * cik_cp_gfx_start - start the gfx ring
4358  *
4359  * @rdev: radeon_device pointer
4360  *
4361  * Enables the ring and loads the clear state context and other
4362  * packets required to init the ring.
4363  * Returns 0 for success, error for failure.
4364  */
4365 static int cik_cp_gfx_start(struct radeon_device *rdev)
4366 {
4367         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4368         int r, i;
4369
4370         /* init the CP */
4371         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4372         WREG32(CP_ENDIAN_SWAP, 0);
4373         WREG32(CP_DEVICE_ID, 1);
4374
4375         cik_cp_gfx_enable(rdev, true);
4376
4377         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4378         if (r) {
4379                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4380                 return r;
4381         }
4382
4383         /* init the CE partitions.  CE only used for gfx on CIK */
4384         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4385         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4386         radeon_ring_write(ring, 0x8000);
4387         radeon_ring_write(ring, 0x8000);
4388
4389         /* setup clear context state */
4390         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4391         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4392
4393         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4394         radeon_ring_write(ring, 0x80000000);
4395         radeon_ring_write(ring, 0x80000000);
4396
4397         for (i = 0; i < cik_default_size; i++)
4398                 radeon_ring_write(ring, cik_default_state[i]);
4399
4400         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4402
4403         /* set clear context state */
4404         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4405         radeon_ring_write(ring, 0);
4406
4407         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4408         radeon_ring_write(ring, 0x00000316);
4409         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4410         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4411
4412         radeon_ring_unlock_commit(rdev, ring, false);
4413
4414         return 0;
4415 }
4416
4417 /**
4418  * cik_cp_gfx_fini - stop the gfx ring
4419  *
4420  * @rdev: radeon_device pointer
4421  *
4422  * Stop the gfx ring and tear down the driver ring
4423  * info.
4424  */
4425 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4426 {
4427         cik_cp_gfx_enable(rdev, false);
4428         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4429 }
4430
4431 /**
4432  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4433  *
4434  * @rdev: radeon_device pointer
4435  *
4436  * Program the location and size of the gfx ring buffer
4437  * and test it to make sure it's working.
4438  * Returns 0 for success, error for failure.
4439  */
4440 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4441 {
4442         struct radeon_ring *ring;
4443         u32 tmp;
4444         u32 rb_bufsz;
4445         u64 rb_addr;
4446         int r;
4447
4448         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4449         if (rdev->family != CHIP_HAWAII)
4450                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4451
4452         /* Set the write pointer delay */
4453         WREG32(CP_RB_WPTR_DELAY, 0);
4454
4455         /* set the RB to use vmid 0 */
4456         WREG32(CP_RB_VMID, 0);
4457
4458         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4459
4460         /* ring 0 - compute and gfx */
4461         /* Set ring buffer size */
4462         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4463         rb_bufsz = order_base_2(ring->ring_size / 8);
4464         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4465 #ifdef __BIG_ENDIAN
4466         tmp |= BUF_SWAP_32BIT;
4467 #endif
4468         WREG32(CP_RB0_CNTL, tmp);
4469
4470         /* Initialize the ring buffer's read and write pointers */
4471         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4472         ring->wptr = 0;
4473         WREG32(CP_RB0_WPTR, ring->wptr);
4474
4475         /* set the wb address wether it's enabled or not */
4476         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4477         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4478
4479         /* scratch register shadowing is no longer supported */
4480         WREG32(SCRATCH_UMSK, 0);
4481
4482         if (!rdev->wb.enabled)
4483                 tmp |= RB_NO_UPDATE;
4484
4485         mdelay(1);
4486         WREG32(CP_RB0_CNTL, tmp);
4487
4488         rb_addr = ring->gpu_addr >> 8;
4489         WREG32(CP_RB0_BASE, rb_addr);
4490         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4491
4492         /* start the ring */
4493         cik_cp_gfx_start(rdev);
4494         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4495         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4496         if (r) {
4497                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4498                 return r;
4499         }
4500
4501         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4502                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4503
4504         return 0;
4505 }
4506
4507 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4508                      struct radeon_ring *ring)
4509 {
4510         u32 rptr;
4511
4512         if (rdev->wb.enabled)
4513                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4514         else
4515                 rptr = RREG32(CP_RB0_RPTR);
4516
4517         return rptr;
4518 }
4519
4520 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4521                      struct radeon_ring *ring)
4522 {
4523         u32 wptr;
4524
4525         wptr = RREG32(CP_RB0_WPTR);
4526
4527         return wptr;
4528 }
4529
4530 void cik_gfx_set_wptr(struct radeon_device *rdev,
4531                       struct radeon_ring *ring)
4532 {
4533         WREG32(CP_RB0_WPTR, ring->wptr);
4534         (void)RREG32(CP_RB0_WPTR);
4535 }
4536
4537 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4538                          struct radeon_ring *ring)
4539 {
4540         u32 rptr;
4541
4542         if (rdev->wb.enabled) {
4543                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4544         } else {
4545                 mutex_lock(&rdev->srbm_mutex);
4546                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4547                 rptr = RREG32(CP_HQD_PQ_RPTR);
4548                 cik_srbm_select(rdev, 0, 0, 0, 0);
4549                 mutex_unlock(&rdev->srbm_mutex);
4550         }
4551
4552         return rptr;
4553 }
4554
4555 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4556                          struct radeon_ring *ring)
4557 {
4558         u32 wptr;
4559
4560         if (rdev->wb.enabled) {
4561                 /* XXX check if swapping is necessary on BE */
4562                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4563         } else {
4564                 mutex_lock(&rdev->srbm_mutex);
4565                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4566                 wptr = RREG32(CP_HQD_PQ_WPTR);
4567                 cik_srbm_select(rdev, 0, 0, 0, 0);
4568                 mutex_unlock(&rdev->srbm_mutex);
4569         }
4570
4571         return wptr;
4572 }
4573
4574 void cik_compute_set_wptr(struct radeon_device *rdev,
4575                           struct radeon_ring *ring)
4576 {
4577         /* XXX check if swapping is necessary on BE */
4578         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4579         WDOORBELL32(ring->doorbell_index, ring->wptr);
4580 }
4581
4582 static void cik_compute_stop(struct radeon_device *rdev,
4583                              struct radeon_ring *ring)
4584 {
4585         u32 j, tmp;
4586
4587         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4588         /* Disable wptr polling. */
4589         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4590         tmp &= ~WPTR_POLL_EN;
4591         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4592         /* Disable HQD. */
4593         if (RREG32(CP_HQD_ACTIVE) & 1) {
4594                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4595                 for (j = 0; j < rdev->usec_timeout; j++) {
4596                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4597                                 break;
4598                         udelay(1);
4599                 }
4600                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4601                 WREG32(CP_HQD_PQ_RPTR, 0);
4602                 WREG32(CP_HQD_PQ_WPTR, 0);
4603         }
4604         cik_srbm_select(rdev, 0, 0, 0, 0);
4605 }
4606
4607 /**
4608  * cik_cp_compute_enable - enable/disable the compute CP MEs
4609  *
4610  * @rdev: radeon_device pointer
4611  * @enable: enable or disable the MEs
4612  *
4613  * Halts or unhalts the compute MEs.
4614  */
4615 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4616 {
4617         if (enable)
4618                 WREG32(CP_MEC_CNTL, 0);
4619         else {
4620                 /*
4621                  * To make hibernation reliable we need to clear compute ring
4622                  * configuration before halting the compute ring.
4623                  */
4624                 mutex_lock(&rdev->srbm_mutex);
4625                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4626                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4627                 mutex_unlock(&rdev->srbm_mutex);
4628
4629                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4630                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4631                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4632         }
4633         udelay(50);
4634 }
4635
4636 /**
4637  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4638  *
4639  * @rdev: radeon_device pointer
4640  *
4641  * Loads the compute MEC1&2 ucode.
4642  * Returns 0 for success, -EINVAL if the ucode is not available.
4643  */
4644 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4645 {
4646         int i;
4647
4648         if (!rdev->mec_fw)
4649                 return -EINVAL;
4650
4651         cik_cp_compute_enable(rdev, false);
4652
4653         if (rdev->new_fw) {
4654                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4655                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4656                 const __le32 *fw_data;
4657                 u32 fw_size;
4658
4659                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4660
4661                 /* MEC1 */
4662                 fw_data = (const __le32 *)
4663                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4664                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4665                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4666                 for (i = 0; i < fw_size; i++)
4667                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4668                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4669
4670                 /* MEC2 */
4671                 if (rdev->family == CHIP_KAVERI) {
4672                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4673                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4674
4675                         fw_data = (const __le32 *)
4676                                 (rdev->mec2_fw->data +
4677                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4678                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4679                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4680                         for (i = 0; i < fw_size; i++)
4681                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4682                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4683                 }
4684         } else {
4685                 const __be32 *fw_data;
4686
4687                 /* MEC1 */
4688                 fw_data = (const __be32 *)rdev->mec_fw->data;
4689                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4690                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4691                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4692                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4693
4694                 if (rdev->family == CHIP_KAVERI) {
4695                         /* MEC2 */
4696                         fw_data = (const __be32 *)rdev->mec_fw->data;
4697                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4698                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4699                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4700                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4701                 }
4702         }
4703
4704         return 0;
4705 }
4706
4707 /**
4708  * cik_cp_compute_start - start the compute queues
4709  *
4710  * @rdev: radeon_device pointer
4711  *
4712  * Enable the compute queues.
4713  * Returns 0 for success, error for failure.
4714  */
4715 static int cik_cp_compute_start(struct radeon_device *rdev)
4716 {
4717         cik_cp_compute_enable(rdev, true);
4718
4719         return 0;
4720 }
4721
4722 /**
4723  * cik_cp_compute_fini - stop the compute queues
4724  *
4725  * @rdev: radeon_device pointer
4726  *
4727  * Stop the compute queues and tear down the driver queue
4728  * info.
4729  */
4730 static void cik_cp_compute_fini(struct radeon_device *rdev)
4731 {
4732         int i, idx, r;
4733
4734         cik_cp_compute_enable(rdev, false);
4735
4736         for (i = 0; i < 2; i++) {
4737                 if (i == 0)
4738                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4739                 else
4740                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4741
4742                 if (rdev->ring[idx].mqd_obj) {
4743                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4744                         if (unlikely(r != 0))
4745                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4746
4747                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4748                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749
4750                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4751                         rdev->ring[idx].mqd_obj = NULL;
4752                 }
4753         }
4754 }
4755
4756 static void cik_mec_fini(struct radeon_device *rdev)
4757 {
4758         int r;
4759
4760         if (rdev->mec.hpd_eop_obj) {
4761                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4762                 if (unlikely(r != 0))
4763                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4764                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4765                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4766
4767                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4768                 rdev->mec.hpd_eop_obj = NULL;
4769         }
4770 }
4771
4772 #define MEC_HPD_SIZE 2048
4773
4774 static int cik_mec_init(struct radeon_device *rdev)
4775 {
4776         int r;
4777         u32 *hpd;
4778
4779         /*
4780          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4781          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4782          * Nonetheless, we assign only 1 pipe because all other pipes will
4783          * be handled by KFD
4784          */
4785         rdev->mec.num_mec = 1;
4786         rdev->mec.num_pipe = 1;
4787         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4788
4789         if (rdev->mec.hpd_eop_obj == NULL) {
4790                 r = radeon_bo_create(rdev,
4791                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4792                                      PAGE_SIZE, true,
4793                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4794                                      &rdev->mec.hpd_eop_obj);
4795                 if (r) {
4796                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4797                         return r;
4798                 }
4799         }
4800
4801         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4802         if (unlikely(r != 0)) {
4803                 cik_mec_fini(rdev);
4804                 return r;
4805         }
4806         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4807                           &rdev->mec.hpd_eop_gpu_addr);
4808         if (r) {
4809                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4810                 cik_mec_fini(rdev);
4811                 return r;
4812         }
4813         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4814         if (r) {
4815                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4816                 cik_mec_fini(rdev);
4817                 return r;
4818         }
4819
4820         /* clear memory.  Not sure if this is required or not */
4821         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4822
4823         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4824         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4825
4826         return 0;
4827 }
4828
4829 struct hqd_registers
4830 {
4831         u32 cp_mqd_base_addr;
4832         u32 cp_mqd_base_addr_hi;
4833         u32 cp_hqd_active;
4834         u32 cp_hqd_vmid;
4835         u32 cp_hqd_persistent_state;
4836         u32 cp_hqd_pipe_priority;
4837         u32 cp_hqd_queue_priority;
4838         u32 cp_hqd_quantum;
4839         u32 cp_hqd_pq_base;
4840         u32 cp_hqd_pq_base_hi;
4841         u32 cp_hqd_pq_rptr;
4842         u32 cp_hqd_pq_rptr_report_addr;
4843         u32 cp_hqd_pq_rptr_report_addr_hi;
4844         u32 cp_hqd_pq_wptr_poll_addr;
4845         u32 cp_hqd_pq_wptr_poll_addr_hi;
4846         u32 cp_hqd_pq_doorbell_control;
4847         u32 cp_hqd_pq_wptr;
4848         u32 cp_hqd_pq_control;
4849         u32 cp_hqd_ib_base_addr;
4850         u32 cp_hqd_ib_base_addr_hi;
4851         u32 cp_hqd_ib_rptr;
4852         u32 cp_hqd_ib_control;
4853         u32 cp_hqd_iq_timer;
4854         u32 cp_hqd_iq_rptr;
4855         u32 cp_hqd_dequeue_request;
4856         u32 cp_hqd_dma_offload;
4857         u32 cp_hqd_sema_cmd;
4858         u32 cp_hqd_msg_type;
4859         u32 cp_hqd_atomic0_preop_lo;
4860         u32 cp_hqd_atomic0_preop_hi;
4861         u32 cp_hqd_atomic1_preop_lo;
4862         u32 cp_hqd_atomic1_preop_hi;
4863         u32 cp_hqd_hq_scheduler0;
4864         u32 cp_hqd_hq_scheduler1;
4865         u32 cp_mqd_control;
4866 };
4867
4868 struct bonaire_mqd
4869 {
4870         u32 header;
4871         u32 dispatch_initiator;
4872         u32 dimensions[3];
4873         u32 start_idx[3];
4874         u32 num_threads[3];
4875         u32 pipeline_stat_enable;
4876         u32 perf_counter_enable;
4877         u32 pgm[2];
4878         u32 tba[2];
4879         u32 tma[2];
4880         u32 pgm_rsrc[2];
4881         u32 vmid;
4882         u32 resource_limits;
4883         u32 static_thread_mgmt01[2];
4884         u32 tmp_ring_size;
4885         u32 static_thread_mgmt23[2];
4886         u32 restart[3];
4887         u32 thread_trace_enable;
4888         u32 reserved1;
4889         u32 user_data[16];
4890         u32 vgtcs_invoke_count[2];
4891         struct hqd_registers queue_state;
4892         u32 dequeue_cntr;
4893         u32 interrupt_queue[64];
4894 };
4895
4896 /**
4897  * cik_cp_compute_resume - setup the compute queue registers
4898  *
4899  * @rdev: radeon_device pointer
4900  *
4901  * Program the compute queues and test them to make sure they
4902  * are working.
4903  * Returns 0 for success, error for failure.
4904  */
4905 static int cik_cp_compute_resume(struct radeon_device *rdev)
4906 {
4907         int r, i, j, idx;
4908         u32 tmp;
4909         bool use_doorbell = true;
4910         u64 hqd_gpu_addr;
4911         u64 mqd_gpu_addr;
4912         u64 eop_gpu_addr;
4913         u64 wb_gpu_addr;
4914         u32 *buf;
4915         struct bonaire_mqd *mqd;
4916
4917         r = cik_cp_compute_start(rdev);
4918         if (r)
4919                 return r;
4920
4921         /* fix up chicken bits */
4922         tmp = RREG32(CP_CPF_DEBUG);
4923         tmp |= (1 << 23);
4924         WREG32(CP_CPF_DEBUG, tmp);
4925
4926         /* init the pipes */
4927         mutex_lock(&rdev->srbm_mutex);
4928
4929         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4930
4931         cik_srbm_select(rdev, 0, 0, 0, 0);
4932
4933         /* write the EOP addr */
4934         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4935         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4936
4937         /* set the VMID assigned */
4938         WREG32(CP_HPD_EOP_VMID, 0);
4939
4940         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4941         tmp = RREG32(CP_HPD_EOP_CONTROL);
4942         tmp &= ~EOP_SIZE_MASK;
4943         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4944         WREG32(CP_HPD_EOP_CONTROL, tmp);
4945
4946         mutex_unlock(&rdev->srbm_mutex);
4947
4948         /* init the queues.  Just two for now. */
4949         for (i = 0; i < 2; i++) {
4950                 if (i == 0)
4951                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4952                 else
4953                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4954
4955                 if (rdev->ring[idx].mqd_obj == NULL) {
4956                         r = radeon_bo_create(rdev,
4957                                              sizeof(struct bonaire_mqd),
4958                                              PAGE_SIZE, true,
4959                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4960                                              NULL, &rdev->ring[idx].mqd_obj);
4961                         if (r) {
4962                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4963                                 return r;
4964                         }
4965                 }
4966
4967                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4968                 if (unlikely(r != 0)) {
4969                         cik_cp_compute_fini(rdev);
4970                         return r;
4971                 }
4972                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4973                                   &mqd_gpu_addr);
4974                 if (r) {
4975                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4976                         cik_cp_compute_fini(rdev);
4977                         return r;
4978                 }
4979                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4980                 if (r) {
4981                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4982                         cik_cp_compute_fini(rdev);
4983                         return r;
4984                 }
4985
4986                 /* init the mqd struct */
4987                 memset(buf, 0, sizeof(struct bonaire_mqd));
4988
4989                 mqd = (struct bonaire_mqd *)buf;
4990                 mqd->header = 0xC0310800;
4991                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4992                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4993                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4994                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4995
4996                 mutex_lock(&rdev->srbm_mutex);
4997                 cik_srbm_select(rdev, rdev->ring[idx].me,
4998                                 rdev->ring[idx].pipe,
4999                                 rdev->ring[idx].queue, 0);
5000
5001                 /* disable wptr polling */
5002                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5003                 tmp &= ~WPTR_POLL_EN;
5004                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5005
5006                 /* enable doorbell? */
5007                 mqd->queue_state.cp_hqd_pq_doorbell_control =
5008                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5009                 if (use_doorbell)
5010                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5011                 else
5012                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5013                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5014                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5015
5016                 /* disable the queue if it's active */
5017                 mqd->queue_state.cp_hqd_dequeue_request = 0;
5018                 mqd->queue_state.cp_hqd_pq_rptr = 0;
5019                 mqd->queue_state.cp_hqd_pq_wptr= 0;
5020                 if (RREG32(CP_HQD_ACTIVE) & 1) {
5021                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5022                         for (j = 0; j < rdev->usec_timeout; j++) {
5023                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
5024                                         break;
5025                                 udelay(1);
5026                         }
5027                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5028                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5029                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5030                 }
5031
5032                 /* set the pointer to the MQD */
5033                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5034                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5035                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5036                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5037                 /* set MQD vmid to 0 */
5038                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5039                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5040                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5041
5042                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5043                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5044                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5045                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5046                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5047                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5048
5049                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5050                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5051                 mqd->queue_state.cp_hqd_pq_control &=
5052                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5053
5054                 mqd->queue_state.cp_hqd_pq_control |=
5055                         order_base_2(rdev->ring[idx].ring_size / 8);
5056                 mqd->queue_state.cp_hqd_pq_control |=
5057                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5058 #ifdef __BIG_ENDIAN
5059                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5060 #endif
5061                 mqd->queue_state.cp_hqd_pq_control &=
5062                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5063                 mqd->queue_state.cp_hqd_pq_control |=
5064                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5065                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5066
5067                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5068                 if (i == 0)
5069                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5070                 else
5071                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5072                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5073                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5074                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5075                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5076                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5077
5078                 /* set the wb address wether it's enabled or not */
5079                 if (i == 0)
5080                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5081                 else
5082                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5083                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5084                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5085                         upper_32_bits(wb_gpu_addr) & 0xffff;
5086                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5087                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5088                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5089                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5090
5091                 /* enable the doorbell if requested */
5092                 if (use_doorbell) {
5093                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5094                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5095                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5096                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5097                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5098                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5099                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5100                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5101
5102                 } else {
5103                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5104                 }
5105                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5106                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5107
5108                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5109                 rdev->ring[idx].wptr = 0;
5110                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5111                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5112                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5113
5114                 /* set the vmid for the queue */
5115                 mqd->queue_state.cp_hqd_vmid = 0;
5116                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5117
5118                 /* activate the queue */
5119                 mqd->queue_state.cp_hqd_active = 1;
5120                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5121
5122                 cik_srbm_select(rdev, 0, 0, 0, 0);
5123                 mutex_unlock(&rdev->srbm_mutex);
5124
5125                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5126                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5127
5128                 rdev->ring[idx].ready = true;
5129                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5130                 if (r)
5131                         rdev->ring[idx].ready = false;
5132         }
5133
5134         return 0;
5135 }
5136
5137 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5138 {
5139         cik_cp_gfx_enable(rdev, enable);
5140         cik_cp_compute_enable(rdev, enable);
5141 }
5142
5143 static int cik_cp_load_microcode(struct radeon_device *rdev)
5144 {
5145         int r;
5146
5147         r = cik_cp_gfx_load_microcode(rdev);
5148         if (r)
5149                 return r;
5150         r = cik_cp_compute_load_microcode(rdev);
5151         if (r)
5152                 return r;
5153
5154         return 0;
5155 }
5156
5157 static void cik_cp_fini(struct radeon_device *rdev)
5158 {
5159         cik_cp_gfx_fini(rdev);
5160         cik_cp_compute_fini(rdev);
5161 }
5162
5163 static int cik_cp_resume(struct radeon_device *rdev)
5164 {
5165         int r;
5166
5167         cik_enable_gui_idle_interrupt(rdev, false);
5168
5169         r = cik_cp_load_microcode(rdev);
5170         if (r)
5171                 return r;
5172
5173         r = cik_cp_gfx_resume(rdev);
5174         if (r)
5175                 return r;
5176         r = cik_cp_compute_resume(rdev);
5177         if (r)
5178                 return r;
5179
5180         cik_enable_gui_idle_interrupt(rdev, true);
5181
5182         return 0;
5183 }
5184
5185 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5186 {
5187         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5188                 RREG32(GRBM_STATUS));
5189         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5190                 RREG32(GRBM_STATUS2));
5191         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5192                 RREG32(GRBM_STATUS_SE0));
5193         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5194                 RREG32(GRBM_STATUS_SE1));
5195         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5196                 RREG32(GRBM_STATUS_SE2));
5197         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5198                 RREG32(GRBM_STATUS_SE3));
5199         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5200                 RREG32(SRBM_STATUS));
5201         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5202                 RREG32(SRBM_STATUS2));
5203         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5204                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5205         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5206                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5207         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5208         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5209                  RREG32(CP_STALLED_STAT1));
5210         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5211                  RREG32(CP_STALLED_STAT2));
5212         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5213                  RREG32(CP_STALLED_STAT3));
5214         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5215                  RREG32(CP_CPF_BUSY_STAT));
5216         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5217                  RREG32(CP_CPF_STALLED_STAT1));
5218         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5219         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5220         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5221                  RREG32(CP_CPC_STALLED_STAT1));
5222         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5223 }
5224
5225 /**
5226  * cik_gpu_check_soft_reset - check which blocks are busy
5227  *
5228  * @rdev: radeon_device pointer
5229  *
5230  * Check which blocks are busy and return the relevant reset
5231  * mask to be used by cik_gpu_soft_reset().
5232  * Returns a mask of the blocks to be reset.
5233  */
5234 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5235 {
5236         u32 reset_mask = 0;
5237         u32 tmp;
5238
5239         /* GRBM_STATUS */
5240         tmp = RREG32(GRBM_STATUS);
5241         if (tmp & (PA_BUSY | SC_BUSY |
5242                    BCI_BUSY | SX_BUSY |
5243                    TA_BUSY | VGT_BUSY |
5244                    DB_BUSY | CB_BUSY |
5245                    GDS_BUSY | SPI_BUSY |
5246                    IA_BUSY | IA_BUSY_NO_DMA))
5247                 reset_mask |= RADEON_RESET_GFX;
5248
5249         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5250                 reset_mask |= RADEON_RESET_CP;
5251
5252         /* GRBM_STATUS2 */
5253         tmp = RREG32(GRBM_STATUS2);
5254         if (tmp & RLC_BUSY)
5255                 reset_mask |= RADEON_RESET_RLC;
5256
5257         /* SDMA0_STATUS_REG */
5258         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5259         if (!(tmp & SDMA_IDLE))
5260                 reset_mask |= RADEON_RESET_DMA;
5261
5262         /* SDMA1_STATUS_REG */
5263         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5264         if (!(tmp & SDMA_IDLE))
5265                 reset_mask |= RADEON_RESET_DMA1;
5266
5267         /* SRBM_STATUS2 */
5268         tmp = RREG32(SRBM_STATUS2);
5269         if (tmp & SDMA_BUSY)
5270                 reset_mask |= RADEON_RESET_DMA;
5271
5272         if (tmp & SDMA1_BUSY)
5273                 reset_mask |= RADEON_RESET_DMA1;
5274
5275         /* SRBM_STATUS */
5276         tmp = RREG32(SRBM_STATUS);
5277
5278         if (tmp & IH_BUSY)
5279                 reset_mask |= RADEON_RESET_IH;
5280
5281         if (tmp & SEM_BUSY)
5282                 reset_mask |= RADEON_RESET_SEM;
5283
5284         if (tmp & GRBM_RQ_PENDING)
5285                 reset_mask |= RADEON_RESET_GRBM;
5286
5287         if (tmp & VMC_BUSY)
5288                 reset_mask |= RADEON_RESET_VMC;
5289
5290         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5291                    MCC_BUSY | MCD_BUSY))
5292                 reset_mask |= RADEON_RESET_MC;
5293
5294         if (evergreen_is_display_hung(rdev))
5295                 reset_mask |= RADEON_RESET_DISPLAY;
5296
5297         /* Skip MC reset as it's mostly likely not hung, just busy */
5298         if (reset_mask & RADEON_RESET_MC) {
5299                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5300                 reset_mask &= ~RADEON_RESET_MC;
5301         }
5302
5303         return reset_mask;
5304 }
5305
5306 /**
5307  * cik_gpu_soft_reset - soft reset GPU
5308  *
5309  * @rdev: radeon_device pointer
5310  * @reset_mask: mask of which blocks to reset
5311  *
5312  * Soft reset the blocks specified in @reset_mask.
5313  */
5314 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5315 {
5316         struct evergreen_mc_save save;
5317         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5318         u32 tmp;
5319
5320         if (reset_mask == 0)
5321                 return;
5322
5323         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5324
5325         cik_print_gpu_status_regs(rdev);
5326         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5327                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5328         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5329                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5330
5331         /* disable CG/PG */
5332         cik_fini_pg(rdev);
5333         cik_fini_cg(rdev);
5334
5335         /* stop the rlc */
5336         cik_rlc_stop(rdev);
5337
5338         /* Disable GFX parsing/prefetching */
5339         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5340
5341         /* Disable MEC parsing/prefetching */
5342         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5343
5344         if (reset_mask & RADEON_RESET_DMA) {
5345                 /* sdma0 */
5346                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5347                 tmp |= SDMA_HALT;
5348                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5349         }
5350         if (reset_mask & RADEON_RESET_DMA1) {
5351                 /* sdma1 */
5352                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5353                 tmp |= SDMA_HALT;
5354                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5355         }
5356
5357         evergreen_mc_stop(rdev, &save);
5358         if (evergreen_mc_wait_for_idle(rdev)) {
5359                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5360         }
5361
5362         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5363                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5364
5365         if (reset_mask & RADEON_RESET_CP) {
5366                 grbm_soft_reset |= SOFT_RESET_CP;
5367
5368                 srbm_soft_reset |= SOFT_RESET_GRBM;
5369         }
5370
5371         if (reset_mask & RADEON_RESET_DMA)
5372                 srbm_soft_reset |= SOFT_RESET_SDMA;
5373
5374         if (reset_mask & RADEON_RESET_DMA1)
5375                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5376
5377         if (reset_mask & RADEON_RESET_DISPLAY)
5378                 srbm_soft_reset |= SOFT_RESET_DC;
5379
5380         if (reset_mask & RADEON_RESET_RLC)
5381                 grbm_soft_reset |= SOFT_RESET_RLC;
5382
5383         if (reset_mask & RADEON_RESET_SEM)
5384                 srbm_soft_reset |= SOFT_RESET_SEM;
5385
5386         if (reset_mask & RADEON_RESET_IH)
5387                 srbm_soft_reset |= SOFT_RESET_IH;
5388
5389         if (reset_mask & RADEON_RESET_GRBM)
5390                 srbm_soft_reset |= SOFT_RESET_GRBM;
5391
5392         if (reset_mask & RADEON_RESET_VMC)
5393                 srbm_soft_reset |= SOFT_RESET_VMC;
5394
5395         if (!(rdev->flags & RADEON_IS_IGP)) {
5396                 if (reset_mask & RADEON_RESET_MC)
5397                         srbm_soft_reset |= SOFT_RESET_MC;
5398         }
5399
5400         if (grbm_soft_reset) {
5401                 tmp = RREG32(GRBM_SOFT_RESET);
5402                 tmp |= grbm_soft_reset;
5403                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5404                 WREG32(GRBM_SOFT_RESET, tmp);
5405                 tmp = RREG32(GRBM_SOFT_RESET);
5406
5407                 udelay(50);
5408
5409                 tmp &= ~grbm_soft_reset;
5410                 WREG32(GRBM_SOFT_RESET, tmp);
5411                 tmp = RREG32(GRBM_SOFT_RESET);
5412         }
5413
5414         if (srbm_soft_reset) {
5415                 tmp = RREG32(SRBM_SOFT_RESET);
5416                 tmp |= srbm_soft_reset;
5417                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5418                 WREG32(SRBM_SOFT_RESET, tmp);
5419                 tmp = RREG32(SRBM_SOFT_RESET);
5420
5421                 udelay(50);
5422
5423                 tmp &= ~srbm_soft_reset;
5424                 WREG32(SRBM_SOFT_RESET, tmp);
5425                 tmp = RREG32(SRBM_SOFT_RESET);
5426         }
5427
5428         /* Wait a little for things to settle down */
5429         udelay(50);
5430
5431         evergreen_mc_resume(rdev, &save);
5432         udelay(50);
5433
5434         cik_print_gpu_status_regs(rdev);
5435 }
5436
5437 struct kv_reset_save_regs {
5438         u32 gmcon_reng_execute;
5439         u32 gmcon_misc;
5440         u32 gmcon_misc3;
5441 };
5442
5443 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5444                                    struct kv_reset_save_regs *save)
5445 {
5446         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5447         save->gmcon_misc = RREG32(GMCON_MISC);
5448         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5449
5450         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5451         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5452                                                 STCTRL_STUTTER_EN));
5453 }
5454
5455 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5456                                       struct kv_reset_save_regs *save)
5457 {
5458         int i;
5459
5460         WREG32(GMCON_PGFSM_WRITE, 0);
5461         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5462
5463         for (i = 0; i < 5; i++)
5464                 WREG32(GMCON_PGFSM_WRITE, 0);
5465
5466         WREG32(GMCON_PGFSM_WRITE, 0);
5467         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5468
5469         for (i = 0; i < 5; i++)
5470                 WREG32(GMCON_PGFSM_WRITE, 0);
5471
5472         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5473         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5474
5475         for (i = 0; i < 5; i++)
5476                 WREG32(GMCON_PGFSM_WRITE, 0);
5477
5478         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5479         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5480
5481         for (i = 0; i < 5; i++)
5482                 WREG32(GMCON_PGFSM_WRITE, 0);
5483
5484         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5485         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5486
5487         for (i = 0; i < 5; i++)
5488                 WREG32(GMCON_PGFSM_WRITE, 0);
5489
5490         WREG32(GMCON_PGFSM_WRITE, 0);
5491         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5492
5493         for (i = 0; i < 5; i++)
5494                 WREG32(GMCON_PGFSM_WRITE, 0);
5495
5496         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5497         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5498
5499         for (i = 0; i < 5; i++)
5500                 WREG32(GMCON_PGFSM_WRITE, 0);
5501
5502         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5503         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5504
5505         for (i = 0; i < 5; i++)
5506                 WREG32(GMCON_PGFSM_WRITE, 0);
5507
5508         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5509         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5510
5511         for (i = 0; i < 5; i++)
5512                 WREG32(GMCON_PGFSM_WRITE, 0);
5513
5514         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5515         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5516
5517         for (i = 0; i < 5; i++)
5518                 WREG32(GMCON_PGFSM_WRITE, 0);
5519
5520         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5521         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5522
5523         WREG32(GMCON_MISC3, save->gmcon_misc3);
5524         WREG32(GMCON_MISC, save->gmcon_misc);
5525         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5526 }
5527
5528 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5529 {
5530         struct evergreen_mc_save save;
5531         struct kv_reset_save_regs kv_save = { 0 };
5532         u32 tmp, i;
5533
5534         dev_info(rdev->dev, "GPU pci config reset\n");
5535
5536         /* disable dpm? */
5537
5538         /* disable cg/pg */
5539         cik_fini_pg(rdev);
5540         cik_fini_cg(rdev);
5541
5542         /* Disable GFX parsing/prefetching */
5543         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5544
5545         /* Disable MEC parsing/prefetching */
5546         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5547
5548         /* sdma0 */
5549         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5550         tmp |= SDMA_HALT;
5551         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5552         /* sdma1 */
5553         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5554         tmp |= SDMA_HALT;
5555         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5556         /* XXX other engines? */
5557
5558         /* halt the rlc, disable cp internal ints */
5559         cik_rlc_stop(rdev);
5560
5561         udelay(50);
5562
5563         /* disable mem access */
5564         evergreen_mc_stop(rdev, &save);
5565         if (evergreen_mc_wait_for_idle(rdev)) {
5566                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5567         }
5568
5569         if (rdev->flags & RADEON_IS_IGP)
5570                 kv_save_regs_for_reset(rdev, &kv_save);
5571
5572         /* disable BM */
5573         pci_clear_master(rdev->pdev);
5574         /* reset */
5575         radeon_pci_config_reset(rdev);
5576
5577         udelay(100);
5578
5579         /* wait for asic to come out of reset */
5580         for (i = 0; i < rdev->usec_timeout; i++) {
5581                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5582                         break;
5583                 udelay(1);
5584         }
5585
5586         /* does asic init need to be run first??? */
5587         if (rdev->flags & RADEON_IS_IGP)
5588                 kv_restore_regs_for_reset(rdev, &kv_save);
5589 }
5590
5591 /**
5592  * cik_asic_reset - soft reset GPU
5593  *
5594  * @rdev: radeon_device pointer
5595  *
5596  * Look up which blocks are hung and attempt
5597  * to reset them.
5598  * Returns 0 for success.
5599  */
5600 int cik_asic_reset(struct radeon_device *rdev)
5601 {
5602         u32 reset_mask;
5603
5604         reset_mask = cik_gpu_check_soft_reset(rdev);
5605
5606         if (reset_mask)
5607                 r600_set_bios_scratch_engine_hung(rdev, true);
5608
5609         /* try soft reset */
5610         cik_gpu_soft_reset(rdev, reset_mask);
5611
5612         reset_mask = cik_gpu_check_soft_reset(rdev);
5613
5614         /* try pci config reset */
5615         if (reset_mask && radeon_hard_reset)
5616                 cik_gpu_pci_config_reset(rdev);
5617
5618         reset_mask = cik_gpu_check_soft_reset(rdev);
5619
5620         if (!reset_mask)
5621                 r600_set_bios_scratch_engine_hung(rdev, false);
5622
5623         return 0;
5624 }
5625
5626 /**
5627  * cik_gfx_is_lockup - check if the 3D engine is locked up
5628  *
5629  * @rdev: radeon_device pointer
5630  * @ring: radeon_ring structure holding ring information
5631  *
5632  * Check if the 3D engine is locked up (CIK).
5633  * Returns true if the engine is locked, false if not.
5634  */
5635 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5636 {
5637         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5638
5639         if (!(reset_mask & (RADEON_RESET_GFX |
5640                             RADEON_RESET_COMPUTE |
5641                             RADEON_RESET_CP))) {
5642                 radeon_ring_lockup_update(rdev, ring);
5643                 return false;
5644         }
5645         return radeon_ring_test_lockup(rdev, ring);
5646 }
5647
5648 /* MC */
5649 /**
5650  * cik_mc_program - program the GPU memory controller
5651  *
5652  * @rdev: radeon_device pointer
5653  *
5654  * Set the location of vram, gart, and AGP in the GPU's
5655  * physical address space (CIK).
5656  */
5657 static void cik_mc_program(struct radeon_device *rdev)
5658 {
5659         struct evergreen_mc_save save;
5660         u32 tmp;
5661         int i, j;
5662
5663         /* Initialize HDP */
5664         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5665                 WREG32((0x2c14 + j), 0x00000000);
5666                 WREG32((0x2c18 + j), 0x00000000);
5667                 WREG32((0x2c1c + j), 0x00000000);
5668                 WREG32((0x2c20 + j), 0x00000000);
5669                 WREG32((0x2c24 + j), 0x00000000);
5670         }
5671         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5672
5673         evergreen_mc_stop(rdev, &save);
5674         if (radeon_mc_wait_for_idle(rdev)) {
5675                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5676         }
5677         /* Lockout access through VGA aperture*/
5678         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5679         /* Update configuration */
5680         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5681                rdev->mc.vram_start >> 12);
5682         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5683                rdev->mc.vram_end >> 12);
5684         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5685                rdev->vram_scratch.gpu_addr >> 12);
5686         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5687         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5688         WREG32(MC_VM_FB_LOCATION, tmp);
5689         /* XXX double check these! */
5690         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5691         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5692         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5693         WREG32(MC_VM_AGP_BASE, 0);
5694         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5695         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5696         if (radeon_mc_wait_for_idle(rdev)) {
5697                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5698         }
5699         evergreen_mc_resume(rdev, &save);
5700         /* we need to own VRAM, so turn off the VGA renderer here
5701          * to stop it overwriting our objects */
5702         rv515_vga_render_disable(rdev);
5703 }
5704
5705 /**
5706  * cik_mc_init - initialize the memory controller driver params
5707  *
5708  * @rdev: radeon_device pointer
5709  *
5710  * Look up the amount of vram, vram width, and decide how to place
5711  * vram and gart within the GPU's physical address space (CIK).
5712  * Returns 0 for success.
5713  */
5714 static int cik_mc_init(struct radeon_device *rdev)
5715 {
5716         u32 tmp;
5717         int chansize, numchan;
5718
5719         /* Get VRAM informations */
5720         rdev->mc.vram_is_ddr = true;
5721         tmp = RREG32(MC_ARB_RAMCFG);
5722         if (tmp & CHANSIZE_MASK) {
5723                 chansize = 64;
5724         } else {
5725                 chansize = 32;
5726         }
5727         tmp = RREG32(MC_SHARED_CHMAP);
5728         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5729         case 0:
5730         default:
5731                 numchan = 1;
5732                 break;
5733         case 1:
5734                 numchan = 2;
5735                 break;
5736         case 2:
5737                 numchan = 4;
5738                 break;
5739         case 3:
5740                 numchan = 8;
5741                 break;
5742         case 4:
5743                 numchan = 3;
5744                 break;
5745         case 5:
5746                 numchan = 6;
5747                 break;
5748         case 6:
5749                 numchan = 10;
5750                 break;
5751         case 7:
5752                 numchan = 12;
5753                 break;
5754         case 8:
5755                 numchan = 16;
5756                 break;
5757         }
5758         rdev->mc.vram_width = numchan * chansize;
5759         /* Could aper size report 0 ? */
5760         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5761         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5762         /* size in MB on si */
5763         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5764         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5765         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5766         si_vram_gtt_location(rdev, &rdev->mc);
5767         radeon_update_bandwidth_info(rdev);
5768
5769         return 0;
5770 }
5771
5772 /*
5773  * GART
5774  * VMID 0 is the physical GPU addresses as used by the kernel.
5775  * VMIDs 1-15 are used for userspace clients and are handled
5776  * by the radeon vm/hsa code.
5777  */
5778 /**
5779  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5780  *
5781  * @rdev: radeon_device pointer
5782  *
5783  * Flush the TLB for the VMID 0 page table (CIK).
5784  */
5785 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5786 {
5787         /* flush hdp cache */
5788         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5789
5790         /* bits 0-15 are the VM contexts0-15 */
5791         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5792 }
5793
5794 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5795 {
5796         int i;
5797         uint32_t sh_mem_bases, sh_mem_config;
5798
5799         sh_mem_bases = 0x6000 | 0x6000 << 16;
5800         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5801         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5802
5803         mutex_lock(&rdev->srbm_mutex);
5804         for (i = 8; i < 16; i++) {
5805                 cik_srbm_select(rdev, 0, 0, 0, i);
5806                 /* CP and shaders */
5807                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5808                 WREG32(SH_MEM_APE1_BASE, 1);
5809                 WREG32(SH_MEM_APE1_LIMIT, 0);
5810                 WREG32(SH_MEM_BASES, sh_mem_bases);
5811         }
5812         cik_srbm_select(rdev, 0, 0, 0, 0);
5813         mutex_unlock(&rdev->srbm_mutex);
5814 }
5815
5816 /**
5817  * cik_pcie_gart_enable - gart enable
5818  *
5819  * @rdev: radeon_device pointer
5820  *
5821  * This sets up the TLBs, programs the page tables for VMID0,
5822  * sets up the hw for VMIDs 1-15 which are allocated on
5823  * demand, and sets up the global locations for the LDS, GDS,
5824  * and GPUVM for FSA64 clients (CIK).
5825  * Returns 0 for success, errors for failure.
5826  */
5827 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5828 {
5829         int r, i;
5830
5831         if (rdev->gart.robj == NULL) {
5832                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5833                 return -EINVAL;
5834         }
5835         r = radeon_gart_table_vram_pin(rdev);
5836         if (r)
5837                 return r;
5838         /* Setup TLB control */
5839         WREG32(MC_VM_MX_L1_TLB_CNTL,
5840                (0xA << 7) |
5841                ENABLE_L1_TLB |
5842                ENABLE_L1_FRAGMENT_PROCESSING |
5843                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5844                ENABLE_ADVANCED_DRIVER_MODEL |
5845                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5846         /* Setup L2 cache */
5847         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5848                ENABLE_L2_FRAGMENT_PROCESSING |
5849                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5850                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5851                EFFECTIVE_L2_QUEUE_SIZE(7) |
5852                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5853         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5854         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5855                BANK_SELECT(4) |
5856                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5857         /* setup context0 */
5858         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5859         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5860         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5861         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5862                         (u32)(rdev->dummy_page.addr >> 12));
5863         WREG32(VM_CONTEXT0_CNTL2, 0);
5864         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5865                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5866
5867         WREG32(0x15D4, 0);
5868         WREG32(0x15D8, 0);
5869         WREG32(0x15DC, 0);
5870
5871         /* restore context1-15 */
5872         /* set vm size, must be a multiple of 4 */
5873         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5874         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5875         for (i = 1; i < 16; i++) {
5876                 if (i < 8)
5877                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5878                                rdev->vm_manager.saved_table_addr[i]);
5879                 else
5880                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5881                                rdev->vm_manager.saved_table_addr[i]);
5882         }
5883
5884         /* enable context1-15 */
5885         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5886                (u32)(rdev->dummy_page.addr >> 12));
5887         WREG32(VM_CONTEXT1_CNTL2, 4);
5888         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5889                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5890                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5891                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5892                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5893                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5894                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5895                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5896                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5897                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5898                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5899                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5900                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5901                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5902
5903         if (rdev->family == CHIP_KAVERI) {
5904                 u32 tmp = RREG32(CHUB_CONTROL);
5905                 tmp &= ~BYPASS_VM;
5906                 WREG32(CHUB_CONTROL, tmp);
5907         }
5908
5909         /* XXX SH_MEM regs */
5910         /* where to put LDS, scratch, GPUVM in FSA64 space */
5911         mutex_lock(&rdev->srbm_mutex);
5912         for (i = 0; i < 16; i++) {
5913                 cik_srbm_select(rdev, 0, 0, 0, i);
5914                 /* CP and shaders */
5915                 WREG32(SH_MEM_CONFIG, 0);
5916                 WREG32(SH_MEM_APE1_BASE, 1);
5917                 WREG32(SH_MEM_APE1_LIMIT, 0);
5918                 WREG32(SH_MEM_BASES, 0);
5919                 /* SDMA GFX */
5920                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5921                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5922                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5923                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5924                 /* XXX SDMA RLC - todo */
5925         }
5926         cik_srbm_select(rdev, 0, 0, 0, 0);
5927         mutex_unlock(&rdev->srbm_mutex);
5928
5929         cik_pcie_init_compute_vmid(rdev);
5930
5931         cik_pcie_gart_tlb_flush(rdev);
5932         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5933                  (unsigned)(rdev->mc.gtt_size >> 20),
5934                  (unsigned long long)rdev->gart.table_addr);
5935         rdev->gart.ready = true;
5936         return 0;
5937 }
5938
5939 /**
5940  * cik_pcie_gart_disable - gart disable
5941  *
5942  * @rdev: radeon_device pointer
5943  *
5944  * This disables all VM page table (CIK).
5945  */
5946 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5947 {
5948         unsigned i;
5949
5950         for (i = 1; i < 16; ++i) {
5951                 uint32_t reg;
5952                 if (i < 8)
5953                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5954                 else
5955                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5956                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5957         }
5958
5959         /* Disable all tables */
5960         WREG32(VM_CONTEXT0_CNTL, 0);
5961         WREG32(VM_CONTEXT1_CNTL, 0);
5962         /* Setup TLB control */
5963         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5964                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5965         /* Setup L2 cache */
5966         WREG32(VM_L2_CNTL,
5967                ENABLE_L2_FRAGMENT_PROCESSING |
5968                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5969                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5970                EFFECTIVE_L2_QUEUE_SIZE(7) |
5971                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5972         WREG32(VM_L2_CNTL2, 0);
5973         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5974                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5975         radeon_gart_table_vram_unpin(rdev);
5976 }
5977
5978 /**
5979  * cik_pcie_gart_fini - vm fini callback
5980  *
5981  * @rdev: radeon_device pointer
5982  *
5983  * Tears down the driver GART/VM setup (CIK).
5984  */
5985 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5986 {
5987         cik_pcie_gart_disable(rdev);
5988         radeon_gart_table_vram_free(rdev);
5989         radeon_gart_fini(rdev);
5990 }
5991
5992 /* vm parser */
5993 /**
5994  * cik_ib_parse - vm ib_parse callback
5995  *
5996  * @rdev: radeon_device pointer
5997  * @ib: indirect buffer pointer
5998  *
5999  * CIK uses hw IB checking so this is a nop (CIK).
6000  */
6001 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6002 {
6003         return 0;
6004 }
6005
6006 /*
6007  * vm
6008  * VMID 0 is the physical GPU addresses as used by the kernel.
6009  * VMIDs 1-15 are used for userspace clients and are handled
6010  * by the radeon vm/hsa code.
6011  */
6012 /**
6013  * cik_vm_init - cik vm init callback
6014  *
6015  * @rdev: radeon_device pointer
6016  *
6017  * Inits cik specific vm parameters (number of VMs, base of vram for
6018  * VMIDs 1-15) (CIK).
6019  * Returns 0 for success.
6020  */
6021 int cik_vm_init(struct radeon_device *rdev)
6022 {
6023         /*
6024          * number of VMs
6025          * VMID 0 is reserved for System
6026          * radeon graphics/compute will use VMIDs 1-7
6027          * amdkfd will use VMIDs 8-15
6028          */
6029         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6030         /* base offset of vram pages */
6031         if (rdev->flags & RADEON_IS_IGP) {
6032                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
6033                 tmp <<= 22;
6034                 rdev->vm_manager.vram_base_offset = tmp;
6035         } else
6036                 rdev->vm_manager.vram_base_offset = 0;
6037
6038         return 0;
6039 }
6040
6041 /**
6042  * cik_vm_fini - cik vm fini callback
6043  *
6044  * @rdev: radeon_device pointer
6045  *
6046  * Tear down any asic specific VM setup (CIK).
6047  */
6048 void cik_vm_fini(struct radeon_device *rdev)
6049 {
6050 }
6051
6052 /**
6053  * cik_vm_decode_fault - print human readable fault info
6054  *
6055  * @rdev: radeon_device pointer
6056  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6057  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6058  *
6059  * Print human readable fault information (CIK).
6060  */
6061 static void cik_vm_decode_fault(struct radeon_device *rdev,
6062                                 u32 status, u32 addr, u32 mc_client)
6063 {
6064         u32 mc_id;
6065         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6066         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6067         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6068                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6069
6070         if (rdev->family == CHIP_HAWAII)
6071                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6072         else
6073                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6074
6075         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6076                protections, vmid, addr,
6077                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6078                block, mc_client, mc_id);
6079 }
6080
6081 /**
6082  * cik_vm_flush - cik vm flush using the CP
6083  *
6084  * @rdev: radeon_device pointer
6085  *
6086  * Update the page table base and flush the VM TLB
6087  * using the CP (CIK).
6088  */
6089 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6090                   unsigned vm_id, uint64_t pd_addr)
6091 {
6092         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6093
6094         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6095         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6096                                  WRITE_DATA_DST_SEL(0)));
6097         if (vm_id < 8) {
6098                 radeon_ring_write(ring,
6099                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6100         } else {
6101                 radeon_ring_write(ring,
6102                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6103         }
6104         radeon_ring_write(ring, 0);
6105         radeon_ring_write(ring, pd_addr >> 12);
6106
6107         /* update SH_MEM_* regs */
6108         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6109         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6110                                  WRITE_DATA_DST_SEL(0)));
6111         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6112         radeon_ring_write(ring, 0);
6113         radeon_ring_write(ring, VMID(vm_id));
6114
6115         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6116         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6117                                  WRITE_DATA_DST_SEL(0)));
6118         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6119         radeon_ring_write(ring, 0);
6120
6121         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6122         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6123         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6124         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6125
6126         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6127         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6128                                  WRITE_DATA_DST_SEL(0)));
6129         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6130         radeon_ring_write(ring, 0);
6131         radeon_ring_write(ring, VMID(0));
6132
6133         /* HDP flush */
6134         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6135
6136         /* bits 0-15 are the VM contexts0-15 */
6137         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6138         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6139                                  WRITE_DATA_DST_SEL(0)));
6140         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6141         radeon_ring_write(ring, 0);
6142         radeon_ring_write(ring, 1 << vm_id);
6143
6144         /* wait for the invalidate to complete */
6145         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6146         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6147                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6148                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6149         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6150         radeon_ring_write(ring, 0);
6151         radeon_ring_write(ring, 0); /* ref */
6152         radeon_ring_write(ring, 0); /* mask */
6153         radeon_ring_write(ring, 0x20); /* poll interval */
6154
6155         /* compute doesn't have PFP */
6156         if (usepfp) {
6157                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6158                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6159                 radeon_ring_write(ring, 0x0);
6160         }
6161 }
6162
6163 /*
6164  * RLC
6165  * The RLC is a multi-purpose microengine that handles a
6166  * variety of functions, the most important of which is
6167  * the interrupt controller.
6168  */
6169 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6170                                           bool enable)
6171 {
6172         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6173
6174         if (enable)
6175                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6176         else
6177                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6178         WREG32(CP_INT_CNTL_RING0, tmp);
6179 }
6180
6181 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6182 {
6183         u32 tmp;
6184
6185         tmp = RREG32(RLC_LB_CNTL);
6186         if (enable)
6187                 tmp |= LOAD_BALANCE_ENABLE;
6188         else
6189                 tmp &= ~LOAD_BALANCE_ENABLE;
6190         WREG32(RLC_LB_CNTL, tmp);
6191 }
6192
6193 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6194 {
6195         u32 i, j, k;
6196         u32 mask;
6197
6198         mutex_lock(&rdev->grbm_idx_mutex);
6199         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6200                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6201                         cik_select_se_sh(rdev, i, j);
6202                         for (k = 0; k < rdev->usec_timeout; k++) {
6203                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6204                                         break;
6205                                 udelay(1);
6206                         }
6207                 }
6208         }
6209         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6210         mutex_unlock(&rdev->grbm_idx_mutex);
6211
6212         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6213         for (k = 0; k < rdev->usec_timeout; k++) {
6214                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6215                         break;
6216                 udelay(1);
6217         }
6218 }
6219
6220 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6221 {
6222         u32 tmp;
6223
6224         tmp = RREG32(RLC_CNTL);
6225         if (tmp != rlc)
6226                 WREG32(RLC_CNTL, rlc);
6227 }
6228
6229 static u32 cik_halt_rlc(struct radeon_device *rdev)
6230 {
6231         u32 data, orig;
6232
6233         orig = data = RREG32(RLC_CNTL);
6234
6235         if (data & RLC_ENABLE) {
6236                 u32 i;
6237
6238                 data &= ~RLC_ENABLE;
6239                 WREG32(RLC_CNTL, data);
6240
6241                 for (i = 0; i < rdev->usec_timeout; i++) {
6242                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6243                                 break;
6244                         udelay(1);
6245                 }
6246
6247                 cik_wait_for_rlc_serdes(rdev);
6248         }
6249
6250         return orig;
6251 }
6252
6253 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6254 {
6255         u32 tmp, i, mask;
6256
6257         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6258         WREG32(RLC_GPR_REG2, tmp);
6259
6260         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6261         for (i = 0; i < rdev->usec_timeout; i++) {
6262                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6263                         break;
6264                 udelay(1);
6265         }
6266
6267         for (i = 0; i < rdev->usec_timeout; i++) {
6268                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6269                         break;
6270                 udelay(1);
6271         }
6272 }
6273
6274 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6275 {
6276         u32 tmp;
6277
6278         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6279         WREG32(RLC_GPR_REG2, tmp);
6280 }
6281
6282 /**
6283  * cik_rlc_stop - stop the RLC ME
6284  *
6285  * @rdev: radeon_device pointer
6286  *
6287  * Halt the RLC ME (MicroEngine) (CIK).
6288  */
6289 static void cik_rlc_stop(struct radeon_device *rdev)
6290 {
6291         WREG32(RLC_CNTL, 0);
6292
6293         cik_enable_gui_idle_interrupt(rdev, false);
6294
6295         cik_wait_for_rlc_serdes(rdev);
6296 }
6297
6298 /**
6299  * cik_rlc_start - start the RLC ME
6300  *
6301  * @rdev: radeon_device pointer
6302  *
6303  * Unhalt the RLC ME (MicroEngine) (CIK).
6304  */
6305 static void cik_rlc_start(struct radeon_device *rdev)
6306 {
6307         WREG32(RLC_CNTL, RLC_ENABLE);
6308
6309         cik_enable_gui_idle_interrupt(rdev, true);
6310
6311         udelay(50);
6312 }
6313
6314 /**
6315  * cik_rlc_resume - setup the RLC hw
6316  *
6317  * @rdev: radeon_device pointer
6318  *
6319  * Initialize the RLC registers, load the ucode,
6320  * and start the RLC (CIK).
6321  * Returns 0 for success, -EINVAL if the ucode is not available.
6322  */
6323 static int cik_rlc_resume(struct radeon_device *rdev)
6324 {
6325         u32 i, size, tmp;
6326
6327         if (!rdev->rlc_fw)
6328                 return -EINVAL;
6329
6330         cik_rlc_stop(rdev);
6331
6332         /* disable CG */
6333         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6334         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6335
6336         si_rlc_reset(rdev);
6337
6338         cik_init_pg(rdev);
6339
6340         cik_init_cg(rdev);
6341
6342         WREG32(RLC_LB_CNTR_INIT, 0);
6343         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6344
6345         mutex_lock(&rdev->grbm_idx_mutex);
6346         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6347         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6348         WREG32(RLC_LB_PARAMS, 0x00600408);
6349         WREG32(RLC_LB_CNTL, 0x80000004);
6350         mutex_unlock(&rdev->grbm_idx_mutex);
6351
6352         WREG32(RLC_MC_CNTL, 0);
6353         WREG32(RLC_UCODE_CNTL, 0);
6354
6355         if (rdev->new_fw) {
6356                 const struct rlc_firmware_header_v1_0 *hdr =
6357                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6358                 const __le32 *fw_data = (const __le32 *)
6359                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6360
6361                 radeon_ucode_print_rlc_hdr(&hdr->header);
6362
6363                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6364                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6365                 for (i = 0; i < size; i++)
6366                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6367                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6368         } else {
6369                 const __be32 *fw_data;
6370
6371                 switch (rdev->family) {
6372                 case CHIP_BONAIRE:
6373                 case CHIP_HAWAII:
6374                 default:
6375                         size = BONAIRE_RLC_UCODE_SIZE;
6376                         break;
6377                 case CHIP_KAVERI:
6378                         size = KV_RLC_UCODE_SIZE;
6379                         break;
6380                 case CHIP_KABINI:
6381                         size = KB_RLC_UCODE_SIZE;
6382                         break;
6383                 case CHIP_MULLINS:
6384                         size = ML_RLC_UCODE_SIZE;
6385                         break;
6386                 }
6387
6388                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6389                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6390                 for (i = 0; i < size; i++)
6391                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6392                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6393         }
6394
6395         /* XXX - find out what chips support lbpw */
6396         cik_enable_lbpw(rdev, false);
6397
6398         if (rdev->family == CHIP_BONAIRE)
6399                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6400
6401         cik_rlc_start(rdev);
6402
6403         return 0;
6404 }
6405
6406 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6407 {
6408         u32 data, orig, tmp, tmp2;
6409
6410         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6411
6412         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6413                 cik_enable_gui_idle_interrupt(rdev, true);
6414
6415                 tmp = cik_halt_rlc(rdev);
6416
6417                 mutex_lock(&rdev->grbm_idx_mutex);
6418                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6419                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6420                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6421                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6422                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6423                 mutex_unlock(&rdev->grbm_idx_mutex);
6424
6425                 cik_update_rlc(rdev, tmp);
6426
6427                 data |= CGCG_EN | CGLS_EN;
6428         } else {
6429                 cik_enable_gui_idle_interrupt(rdev, false);
6430
6431                 RREG32(CB_CGTT_SCLK_CTRL);
6432                 RREG32(CB_CGTT_SCLK_CTRL);
6433                 RREG32(CB_CGTT_SCLK_CTRL);
6434                 RREG32(CB_CGTT_SCLK_CTRL);
6435
6436                 data &= ~(CGCG_EN | CGLS_EN);
6437         }
6438
6439         if (orig != data)
6440                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6441
6442 }
6443
6444 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6445 {
6446         u32 data, orig, tmp = 0;
6447
6448         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6449                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6450                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6451                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6452                                 data |= CP_MEM_LS_EN;
6453                                 if (orig != data)
6454                                         WREG32(CP_MEM_SLP_CNTL, data);
6455                         }
6456                 }
6457
6458                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6459                 data |= 0x00000001;
6460                 data &= 0xfffffffd;
6461                 if (orig != data)
6462                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6463
6464                 tmp = cik_halt_rlc(rdev);
6465
6466                 mutex_lock(&rdev->grbm_idx_mutex);
6467                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6468                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6469                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6470                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6471                 WREG32(RLC_SERDES_WR_CTRL, data);
6472                 mutex_unlock(&rdev->grbm_idx_mutex);
6473
6474                 cik_update_rlc(rdev, tmp);
6475
6476                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6477                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6478                         data &= ~SM_MODE_MASK;
6479                         data |= SM_MODE(0x2);
6480                         data |= SM_MODE_ENABLE;
6481                         data &= ~CGTS_OVERRIDE;
6482                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6483                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6484                                 data &= ~CGTS_LS_OVERRIDE;
6485                         data &= ~ON_MONITOR_ADD_MASK;
6486                         data |= ON_MONITOR_ADD_EN;
6487                         data |= ON_MONITOR_ADD(0x96);
6488                         if (orig != data)
6489                                 WREG32(CGTS_SM_CTRL_REG, data);
6490                 }
6491         } else {
6492                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6493                 data |= 0x00000003;
6494                 if (orig != data)
6495                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6496
6497                 data = RREG32(RLC_MEM_SLP_CNTL);
6498                 if (data & RLC_MEM_LS_EN) {
6499                         data &= ~RLC_MEM_LS_EN;
6500                         WREG32(RLC_MEM_SLP_CNTL, data);
6501                 }
6502
6503                 data = RREG32(CP_MEM_SLP_CNTL);
6504                 if (data & CP_MEM_LS_EN) {
6505                         data &= ~CP_MEM_LS_EN;
6506                         WREG32(CP_MEM_SLP_CNTL, data);
6507                 }
6508
6509                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6510                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6511                 if (orig != data)
6512                         WREG32(CGTS_SM_CTRL_REG, data);
6513
6514                 tmp = cik_halt_rlc(rdev);
6515
6516                 mutex_lock(&rdev->grbm_idx_mutex);
6517                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6518                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6519                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6520                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6521                 WREG32(RLC_SERDES_WR_CTRL, data);
6522                 mutex_unlock(&rdev->grbm_idx_mutex);
6523
6524                 cik_update_rlc(rdev, tmp);
6525         }
6526 }
6527
6528 static const u32 mc_cg_registers[] =
6529 {
6530         MC_HUB_MISC_HUB_CG,
6531         MC_HUB_MISC_SIP_CG,
6532         MC_HUB_MISC_VM_CG,
6533         MC_XPB_CLK_GAT,
6534         ATC_MISC_CG,
6535         MC_CITF_MISC_WR_CG,
6536         MC_CITF_MISC_RD_CG,
6537         MC_CITF_MISC_VM_CG,
6538         VM_L2_CG,
6539 };
6540
6541 static void cik_enable_mc_ls(struct radeon_device *rdev,
6542                              bool enable)
6543 {
6544         int i;
6545         u32 orig, data;
6546
6547         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6548                 orig = data = RREG32(mc_cg_registers[i]);
6549                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6550                         data |= MC_LS_ENABLE;
6551                 else
6552                         data &= ~MC_LS_ENABLE;
6553                 if (data != orig)
6554                         WREG32(mc_cg_registers[i], data);
6555         }
6556 }
6557
6558 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6559                                bool enable)
6560 {
6561         int i;
6562         u32 orig, data;
6563
6564         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6565                 orig = data = RREG32(mc_cg_registers[i]);
6566                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6567                         data |= MC_CG_ENABLE;
6568                 else
6569                         data &= ~MC_CG_ENABLE;
6570                 if (data != orig)
6571                         WREG32(mc_cg_registers[i], data);
6572         }
6573 }
6574
6575 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6576                                  bool enable)
6577 {
6578         u32 orig, data;
6579
6580         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6581                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6582                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6583         } else {
6584                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6585                 data |= 0xff000000;
6586                 if (data != orig)
6587                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6588
6589                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6590                 data |= 0xff000000;
6591                 if (data != orig)
6592                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6593         }
6594 }
6595
6596 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6597                                  bool enable)
6598 {
6599         u32 orig, data;
6600
6601         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6602                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6603                 data |= 0x100;
6604                 if (orig != data)
6605                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6606
6607                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6608                 data |= 0x100;
6609                 if (orig != data)
6610                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6611         } else {
6612                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6613                 data &= ~0x100;
6614                 if (orig != data)
6615                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6616
6617                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6618                 data &= ~0x100;
6619                 if (orig != data)
6620                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6621         }
6622 }
6623
6624 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6625                                 bool enable)
6626 {
6627         u32 orig, data;
6628
6629         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6630                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6631                 data = 0xfff;
6632                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6633
6634                 orig = data = RREG32(UVD_CGC_CTRL);
6635                 data |= DCM;
6636                 if (orig != data)
6637                         WREG32(UVD_CGC_CTRL, data);
6638         } else {
6639                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6640                 data &= ~0xfff;
6641                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6642
6643                 orig = data = RREG32(UVD_CGC_CTRL);
6644                 data &= ~DCM;
6645                 if (orig != data)
6646                         WREG32(UVD_CGC_CTRL, data);
6647         }
6648 }
6649
6650 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6651                                bool enable)
6652 {
6653         u32 orig, data;
6654
6655         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6656
6657         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6658                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6659                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6660         else
6661                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6662                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6663
6664         if (orig != data)
6665                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6666 }
6667
6668 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6669                                 bool enable)
6670 {
6671         u32 orig, data;
6672
6673         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6674
6675         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6676                 data &= ~CLOCK_GATING_DIS;
6677         else
6678                 data |= CLOCK_GATING_DIS;
6679
6680         if (orig != data)
6681                 WREG32(HDP_HOST_PATH_CNTL, data);
6682 }
6683
6684 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6685                               bool enable)
6686 {
6687         u32 orig, data;
6688
6689         orig = data = RREG32(HDP_MEM_POWER_LS);
6690
6691         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6692                 data |= HDP_LS_ENABLE;
6693         else
6694                 data &= ~HDP_LS_ENABLE;
6695
6696         if (orig != data)
6697                 WREG32(HDP_MEM_POWER_LS, data);
6698 }
6699
6700 void cik_update_cg(struct radeon_device *rdev,
6701                    u32 block, bool enable)
6702 {
6703
6704         if (block & RADEON_CG_BLOCK_GFX) {
6705                 cik_enable_gui_idle_interrupt(rdev, false);
6706                 /* order matters! */
6707                 if (enable) {
6708                         cik_enable_mgcg(rdev, true);
6709                         cik_enable_cgcg(rdev, true);
6710                 } else {
6711                         cik_enable_cgcg(rdev, false);
6712                         cik_enable_mgcg(rdev, false);
6713                 }
6714                 cik_enable_gui_idle_interrupt(rdev, true);
6715         }
6716
6717         if (block & RADEON_CG_BLOCK_MC) {
6718                 if (!(rdev->flags & RADEON_IS_IGP)) {
6719                         cik_enable_mc_mgcg(rdev, enable);
6720                         cik_enable_mc_ls(rdev, enable);
6721                 }
6722         }
6723
6724         if (block & RADEON_CG_BLOCK_SDMA) {
6725                 cik_enable_sdma_mgcg(rdev, enable);
6726                 cik_enable_sdma_mgls(rdev, enable);
6727         }
6728
6729         if (block & RADEON_CG_BLOCK_BIF) {
6730                 cik_enable_bif_mgls(rdev, enable);
6731         }
6732
6733         if (block & RADEON_CG_BLOCK_UVD) {
6734                 if (rdev->has_uvd)
6735                         cik_enable_uvd_mgcg(rdev, enable);
6736         }
6737
6738         if (block & RADEON_CG_BLOCK_HDP) {
6739                 cik_enable_hdp_mgcg(rdev, enable);
6740                 cik_enable_hdp_ls(rdev, enable);
6741         }
6742
6743         if (block & RADEON_CG_BLOCK_VCE) {
6744                 vce_v2_0_enable_mgcg(rdev, enable);
6745         }
6746 }
6747
6748 static void cik_init_cg(struct radeon_device *rdev)
6749 {
6750
6751         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6752
6753         if (rdev->has_uvd)
6754                 si_init_uvd_internal_cg(rdev);
6755
6756         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6757                              RADEON_CG_BLOCK_SDMA |
6758                              RADEON_CG_BLOCK_BIF |
6759                              RADEON_CG_BLOCK_UVD |
6760                              RADEON_CG_BLOCK_HDP), true);
6761 }
6762
6763 static void cik_fini_cg(struct radeon_device *rdev)
6764 {
6765         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6766                              RADEON_CG_BLOCK_SDMA |
6767                              RADEON_CG_BLOCK_BIF |
6768                              RADEON_CG_BLOCK_UVD |
6769                              RADEON_CG_BLOCK_HDP), false);
6770
6771         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6772 }
6773
6774 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6775                                           bool enable)
6776 {
6777         u32 data, orig;
6778
6779         orig = data = RREG32(RLC_PG_CNTL);
6780         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6781                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6782         else
6783                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6784         if (orig != data)
6785                 WREG32(RLC_PG_CNTL, data);
6786 }
6787
6788 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6789                                           bool enable)
6790 {
6791         u32 data, orig;
6792
6793         orig = data = RREG32(RLC_PG_CNTL);
6794         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6795                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6796         else
6797                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6798         if (orig != data)
6799                 WREG32(RLC_PG_CNTL, data);
6800 }
6801
6802 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6803 {
6804         u32 data, orig;
6805
6806         orig = data = RREG32(RLC_PG_CNTL);
6807         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6808                 data &= ~DISABLE_CP_PG;
6809         else
6810                 data |= DISABLE_CP_PG;
6811         if (orig != data)
6812                 WREG32(RLC_PG_CNTL, data);
6813 }
6814
6815 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6816 {
6817         u32 data, orig;
6818
6819         orig = data = RREG32(RLC_PG_CNTL);
6820         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6821                 data &= ~DISABLE_GDS_PG;
6822         else
6823                 data |= DISABLE_GDS_PG;
6824         if (orig != data)
6825                 WREG32(RLC_PG_CNTL, data);
6826 }
6827
6828 #define CP_ME_TABLE_SIZE    96
6829 #define CP_ME_TABLE_OFFSET  2048
6830 #define CP_MEC_TABLE_OFFSET 4096
6831
6832 void cik_init_cp_pg_table(struct radeon_device *rdev)
6833 {
6834         volatile u32 *dst_ptr;
6835         int me, i, max_me = 4;
6836         u32 bo_offset = 0;
6837         u32 table_offset, table_size;
6838
6839         if (rdev->family == CHIP_KAVERI)
6840                 max_me = 5;
6841
6842         if (rdev->rlc.cp_table_ptr == NULL)
6843                 return;
6844
6845         /* write the cp table buffer */
6846         dst_ptr = rdev->rlc.cp_table_ptr;
6847         for (me = 0; me < max_me; me++) {
6848                 if (rdev->new_fw) {
6849                         const __le32 *fw_data;
6850                         const struct gfx_firmware_header_v1_0 *hdr;
6851
6852                         if (me == 0) {
6853                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6854                                 fw_data = (const __le32 *)
6855                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6856                                 table_offset = le32_to_cpu(hdr->jt_offset);
6857                                 table_size = le32_to_cpu(hdr->jt_size);
6858                         } else if (me == 1) {
6859                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6860                                 fw_data = (const __le32 *)
6861                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6862                                 table_offset = le32_to_cpu(hdr->jt_offset);
6863                                 table_size = le32_to_cpu(hdr->jt_size);
6864                         } else if (me == 2) {
6865                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6866                                 fw_data = (const __le32 *)
6867                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6868                                 table_offset = le32_to_cpu(hdr->jt_offset);
6869                                 table_size = le32_to_cpu(hdr->jt_size);
6870                         } else if (me == 3) {
6871                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6872                                 fw_data = (const __le32 *)
6873                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6874                                 table_offset = le32_to_cpu(hdr->jt_offset);
6875                                 table_size = le32_to_cpu(hdr->jt_size);
6876                         } else {
6877                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6878                                 fw_data = (const __le32 *)
6879                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6880                                 table_offset = le32_to_cpu(hdr->jt_offset);
6881                                 table_size = le32_to_cpu(hdr->jt_size);
6882                         }
6883
6884                         for (i = 0; i < table_size; i ++) {
6885                                 dst_ptr[bo_offset + i] =
6886                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6887                         }
6888                         bo_offset += table_size;
6889                 } else {
6890                         const __be32 *fw_data;
6891                         table_size = CP_ME_TABLE_SIZE;
6892
6893                         if (me == 0) {
6894                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6895                                 table_offset = CP_ME_TABLE_OFFSET;
6896                         } else if (me == 1) {
6897                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6898                                 table_offset = CP_ME_TABLE_OFFSET;
6899                         } else if (me == 2) {
6900                                 fw_data = (const __be32 *)rdev->me_fw->data;
6901                                 table_offset = CP_ME_TABLE_OFFSET;
6902                         } else {
6903                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6904                                 table_offset = CP_MEC_TABLE_OFFSET;
6905                         }
6906
6907                         for (i = 0; i < table_size; i ++) {
6908                                 dst_ptr[bo_offset + i] =
6909                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6910                         }
6911                         bo_offset += table_size;
6912                 }
6913         }
6914 }
6915
6916 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6917                                 bool enable)
6918 {
6919         u32 data, orig;
6920
6921         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6922                 orig = data = RREG32(RLC_PG_CNTL);
6923                 data |= GFX_PG_ENABLE;
6924                 if (orig != data)
6925                         WREG32(RLC_PG_CNTL, data);
6926
6927                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6928                 data |= AUTO_PG_EN;
6929                 if (orig != data)
6930                         WREG32(RLC_AUTO_PG_CTRL, data);
6931         } else {
6932                 orig = data = RREG32(RLC_PG_CNTL);
6933                 data &= ~GFX_PG_ENABLE;
6934                 if (orig != data)
6935                         WREG32(RLC_PG_CNTL, data);
6936
6937                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6938                 data &= ~AUTO_PG_EN;
6939                 if (orig != data)
6940                         WREG32(RLC_AUTO_PG_CTRL, data);
6941
6942                 data = RREG32(DB_RENDER_CONTROL);
6943         }
6944 }
6945
6946 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6947 {
6948         u32 mask = 0, tmp, tmp1;
6949         int i;
6950
6951         mutex_lock(&rdev->grbm_idx_mutex);
6952         cik_select_se_sh(rdev, se, sh);
6953         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6954         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6955         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6956         mutex_unlock(&rdev->grbm_idx_mutex);
6957
6958         tmp &= 0xffff0000;
6959
6960         tmp |= tmp1;
6961         tmp >>= 16;
6962
6963         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6964                 mask <<= 1;
6965                 mask |= 1;
6966         }
6967
6968         return (~tmp) & mask;
6969 }
6970
6971 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6972 {
6973         u32 i, j, k, active_cu_number = 0;
6974         u32 mask, counter, cu_bitmap;
6975         u32 tmp = 0;
6976
6977         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6978                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6979                         mask = 1;
6980                         cu_bitmap = 0;
6981                         counter = 0;
6982                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6983                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6984                                         if (counter < 2)
6985                                                 cu_bitmap |= mask;
6986                                         counter ++;
6987                                 }
6988                                 mask <<= 1;
6989                         }
6990
6991                         active_cu_number += counter;
6992                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6993                 }
6994         }
6995
6996         WREG32(RLC_PG_AO_CU_MASK, tmp);
6997
6998         tmp = RREG32(RLC_MAX_PG_CU);
6999         tmp &= ~MAX_PU_CU_MASK;
7000         tmp |= MAX_PU_CU(active_cu_number);
7001         WREG32(RLC_MAX_PG_CU, tmp);
7002 }
7003
7004 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7005                                        bool enable)
7006 {
7007         u32 data, orig;
7008
7009         orig = data = RREG32(RLC_PG_CNTL);
7010         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7011                 data |= STATIC_PER_CU_PG_ENABLE;
7012         else
7013                 data &= ~STATIC_PER_CU_PG_ENABLE;
7014         if (orig != data)
7015                 WREG32(RLC_PG_CNTL, data);
7016 }
7017
7018 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7019                                         bool enable)
7020 {
7021         u32 data, orig;
7022
7023         orig = data = RREG32(RLC_PG_CNTL);
7024         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7025                 data |= DYN_PER_CU_PG_ENABLE;
7026         else
7027                 data &= ~DYN_PER_CU_PG_ENABLE;
7028         if (orig != data)
7029                 WREG32(RLC_PG_CNTL, data);
7030 }
7031
7032 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7033 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7034
7035 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7036 {
7037         u32 data, orig;
7038         u32 i;
7039
7040         if (rdev->rlc.cs_data) {
7041                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7042                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7043                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7044                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7045         } else {
7046                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7047                 for (i = 0; i < 3; i++)
7048                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
7049         }
7050         if (rdev->rlc.reg_list) {
7051                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7052                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
7053                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7054         }
7055
7056         orig = data = RREG32(RLC_PG_CNTL);
7057         data |= GFX_PG_SRC;
7058         if (orig != data)
7059                 WREG32(RLC_PG_CNTL, data);
7060
7061         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7062         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7063
7064         data = RREG32(CP_RB_WPTR_POLL_CNTL);
7065         data &= ~IDLE_POLL_COUNT_MASK;
7066         data |= IDLE_POLL_COUNT(0x60);
7067         WREG32(CP_RB_WPTR_POLL_CNTL, data);
7068
7069         data = 0x10101010;
7070         WREG32(RLC_PG_DELAY, data);
7071
7072         data = RREG32(RLC_PG_DELAY_2);
7073         data &= ~0xff;
7074         data |= 0x3;
7075         WREG32(RLC_PG_DELAY_2, data);
7076
7077         data = RREG32(RLC_AUTO_PG_CTRL);
7078         data &= ~GRBM_REG_SGIT_MASK;
7079         data |= GRBM_REG_SGIT(0x700);
7080         WREG32(RLC_AUTO_PG_CTRL, data);
7081
7082 }
7083
7084 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7085 {
7086         cik_enable_gfx_cgpg(rdev, enable);
7087         cik_enable_gfx_static_mgpg(rdev, enable);
7088         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7089 }
7090
7091 u32 cik_get_csb_size(struct radeon_device *rdev)
7092 {
7093         u32 count = 0;
7094         const struct cs_section_def *sect = NULL;
7095         const struct cs_extent_def *ext = NULL;
7096
7097         if (rdev->rlc.cs_data == NULL)
7098                 return 0;
7099
7100         /* begin clear state */
7101         count += 2;
7102         /* context control state */
7103         count += 3;
7104
7105         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7106                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7107                         if (sect->id == SECT_CONTEXT)
7108                                 count += 2 + ext->reg_count;
7109                         else
7110                                 return 0;
7111                 }
7112         }
7113         /* pa_sc_raster_config/pa_sc_raster_config1 */
7114         count += 4;
7115         /* end clear state */
7116         count += 2;
7117         /* clear state */
7118         count += 2;
7119
7120         return count;
7121 }
7122
7123 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7124 {
7125         u32 count = 0, i;
7126         const struct cs_section_def *sect = NULL;
7127         const struct cs_extent_def *ext = NULL;
7128
7129         if (rdev->rlc.cs_data == NULL)
7130                 return;
7131         if (buffer == NULL)
7132                 return;
7133
7134         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7135         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7136
7137         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7138         buffer[count++] = cpu_to_le32(0x80000000);
7139         buffer[count++] = cpu_to_le32(0x80000000);
7140
7141         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7142                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7143                         if (sect->id == SECT_CONTEXT) {
7144                                 buffer[count++] =
7145                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7146                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7147                                 for (i = 0; i < ext->reg_count; i++)
7148                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7149                         } else {
7150                                 return;
7151                         }
7152                 }
7153         }
7154
7155         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7156         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7157         switch (rdev->family) {
7158         case CHIP_BONAIRE:
7159                 buffer[count++] = cpu_to_le32(0x16000012);
7160                 buffer[count++] = cpu_to_le32(0x00000000);
7161                 break;
7162         case CHIP_KAVERI:
7163                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7164                 buffer[count++] = cpu_to_le32(0x00000000);
7165                 break;
7166         case CHIP_KABINI:
7167         case CHIP_MULLINS:
7168                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7169                 buffer[count++] = cpu_to_le32(0x00000000);
7170                 break;
7171         case CHIP_HAWAII:
7172                 buffer[count++] = cpu_to_le32(0x3a00161a);
7173                 buffer[count++] = cpu_to_le32(0x0000002e);
7174                 break;
7175         default:
7176                 buffer[count++] = cpu_to_le32(0x00000000);
7177                 buffer[count++] = cpu_to_le32(0x00000000);
7178                 break;
7179         }
7180
7181         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7182         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7183
7184         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7185         buffer[count++] = cpu_to_le32(0);
7186 }
7187
7188 static void cik_init_pg(struct radeon_device *rdev)
7189 {
7190         if (rdev->pg_flags) {
7191                 cik_enable_sck_slowdown_on_pu(rdev, true);
7192                 cik_enable_sck_slowdown_on_pd(rdev, true);
7193                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7194                         cik_init_gfx_cgpg(rdev);
7195                         cik_enable_cp_pg(rdev, true);
7196                         cik_enable_gds_pg(rdev, true);
7197                 }
7198                 cik_init_ao_cu_mask(rdev);
7199                 cik_update_gfx_pg(rdev, true);
7200         }
7201 }
7202
7203 static void cik_fini_pg(struct radeon_device *rdev)
7204 {
7205         if (rdev->pg_flags) {
7206                 cik_update_gfx_pg(rdev, false);
7207                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7208                         cik_enable_cp_pg(rdev, false);
7209                         cik_enable_gds_pg(rdev, false);
7210                 }
7211         }
7212 }
7213
7214 /*
7215  * Interrupts
7216  * Starting with r6xx, interrupts are handled via a ring buffer.
7217  * Ring buffers are areas of GPU accessible memory that the GPU
7218  * writes interrupt vectors into and the host reads vectors out of.
7219  * There is a rptr (read pointer) that determines where the
7220  * host is currently reading, and a wptr (write pointer)
7221  * which determines where the GPU has written.  When the
7222  * pointers are equal, the ring is idle.  When the GPU
7223  * writes vectors to the ring buffer, it increments the
7224  * wptr.  When there is an interrupt, the host then starts
7225  * fetching commands and processing them until the pointers are
7226  * equal again at which point it updates the rptr.
7227  */
7228
7229 /**
7230  * cik_enable_interrupts - Enable the interrupt ring buffer
7231  *
7232  * @rdev: radeon_device pointer
7233  *
7234  * Enable the interrupt ring buffer (CIK).
7235  */
7236 static void cik_enable_interrupts(struct radeon_device *rdev)
7237 {
7238         u32 ih_cntl = RREG32(IH_CNTL);
7239         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7240
7241         ih_cntl |= ENABLE_INTR;
7242         ih_rb_cntl |= IH_RB_ENABLE;
7243         WREG32(IH_CNTL, ih_cntl);
7244         WREG32(IH_RB_CNTL, ih_rb_cntl);
7245         rdev->ih.enabled = true;
7246 }
7247
7248 /**
7249  * cik_disable_interrupts - Disable the interrupt ring buffer
7250  *
7251  * @rdev: radeon_device pointer
7252  *
7253  * Disable the interrupt ring buffer (CIK).
7254  */
7255 static void cik_disable_interrupts(struct radeon_device *rdev)
7256 {
7257         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7258         u32 ih_cntl = RREG32(IH_CNTL);
7259
7260         ih_rb_cntl &= ~IH_RB_ENABLE;
7261         ih_cntl &= ~ENABLE_INTR;
7262         WREG32(IH_RB_CNTL, ih_rb_cntl);
7263         WREG32(IH_CNTL, ih_cntl);
7264         /* set rptr, wptr to 0 */
7265         WREG32(IH_RB_RPTR, 0);
7266         WREG32(IH_RB_WPTR, 0);
7267         rdev->ih.enabled = false;
7268         rdev->ih.rptr = 0;
7269 }
7270
7271 /**
7272  * cik_disable_interrupt_state - Disable all interrupt sources
7273  *
7274  * @rdev: radeon_device pointer
7275  *
7276  * Clear all interrupt enable bits used by the driver (CIK).
7277  */
7278 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7279 {
7280         u32 tmp;
7281
7282         /* gfx ring */
7283         tmp = RREG32(CP_INT_CNTL_RING0) &
7284                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7285         WREG32(CP_INT_CNTL_RING0, tmp);
7286         /* sdma */
7287         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7288         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7289         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7290         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7291         /* compute queues */
7292         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7293         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7294         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7295         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7296         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7297         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7298         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7299         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7300         /* grbm */
7301         WREG32(GRBM_INT_CNTL, 0);
7302         /* SRBM */
7303         WREG32(SRBM_INT_CNTL, 0);
7304         /* vline/vblank, etc. */
7305         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7306         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7307         if (rdev->num_crtc >= 4) {
7308                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7309                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7310         }
7311         if (rdev->num_crtc >= 6) {
7312                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7313                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7314         }
7315         /* pflip */
7316         if (rdev->num_crtc >= 2) {
7317                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7318                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7319         }
7320         if (rdev->num_crtc >= 4) {
7321                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7322                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7323         }
7324         if (rdev->num_crtc >= 6) {
7325                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7326                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7327         }
7328
7329         /* dac hotplug */
7330         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7331
7332         /* digital hotplug */
7333         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7334         WREG32(DC_HPD1_INT_CONTROL, tmp);
7335         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7336         WREG32(DC_HPD2_INT_CONTROL, tmp);
7337         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7338         WREG32(DC_HPD3_INT_CONTROL, tmp);
7339         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7340         WREG32(DC_HPD4_INT_CONTROL, tmp);
7341         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7342         WREG32(DC_HPD5_INT_CONTROL, tmp);
7343         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7344         WREG32(DC_HPD6_INT_CONTROL, tmp);
7345
7346 }
7347
7348 /**
7349  * cik_irq_init - init and enable the interrupt ring
7350  *
7351  * @rdev: radeon_device pointer
7352  *
7353  * Allocate a ring buffer for the interrupt controller,
7354  * enable the RLC, disable interrupts, enable the IH
7355  * ring buffer and enable it (CIK).
7356  * Called at device load and reume.
7357  * Returns 0 for success, errors for failure.
7358  */
7359 static int cik_irq_init(struct radeon_device *rdev)
7360 {
7361         int ret = 0;
7362         int rb_bufsz;
7363         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7364
7365         /* allocate ring */
7366         ret = r600_ih_ring_alloc(rdev);
7367         if (ret)
7368                 return ret;
7369
7370         /* disable irqs */
7371         cik_disable_interrupts(rdev);
7372
7373         /* init rlc */
7374         ret = cik_rlc_resume(rdev);
7375         if (ret) {
7376                 r600_ih_ring_fini(rdev);
7377                 return ret;
7378         }
7379
7380         /* setup interrupt control */
7381         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7382         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7383         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7384         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7385          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7386          */
7387         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7388         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7389         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7390         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7391
7392         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7393         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7394
7395         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7396                       IH_WPTR_OVERFLOW_CLEAR |
7397                       (rb_bufsz << 1));
7398
7399         if (rdev->wb.enabled)
7400                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7401
7402         /* set the writeback address whether it's enabled or not */
7403         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7404         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7405
7406         WREG32(IH_RB_CNTL, ih_rb_cntl);
7407
7408         /* set rptr, wptr to 0 */
7409         WREG32(IH_RB_RPTR, 0);
7410         WREG32(IH_RB_WPTR, 0);
7411
7412         /* Default settings for IH_CNTL (disabled at first) */
7413         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7414         /* RPTR_REARM only works if msi's are enabled */
7415         if (rdev->msi_enabled)
7416                 ih_cntl |= RPTR_REARM;
7417         WREG32(IH_CNTL, ih_cntl);
7418
7419         /* force the active interrupt state to all disabled */
7420         cik_disable_interrupt_state(rdev);
7421
7422         pci_set_master(rdev->pdev);
7423
7424         /* enable irqs */
7425         cik_enable_interrupts(rdev);
7426
7427         return ret;
7428 }
7429
7430 /**
7431  * cik_irq_set - enable/disable interrupt sources
7432  *
7433  * @rdev: radeon_device pointer
7434  *
7435  * Enable interrupt sources on the GPU (vblanks, hpd,
7436  * etc.) (CIK).
7437  * Returns 0 for success, errors for failure.
7438  */
7439 int cik_irq_set(struct radeon_device *rdev)
7440 {
7441         u32 cp_int_cntl;
7442         u32 cp_m1p0;
7443         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7444         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7445         u32 grbm_int_cntl = 0;
7446         u32 dma_cntl, dma_cntl1;
7447
7448         if (!rdev->irq.installed) {
7449                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7450                 return -EINVAL;
7451         }
7452         /* don't enable anything if the ih is disabled */
7453         if (!rdev->ih.enabled) {
7454                 cik_disable_interrupts(rdev);
7455                 /* force the active interrupt state to all disabled */
7456                 cik_disable_interrupt_state(rdev);
7457                 return 0;
7458         }
7459
7460         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7461                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7462         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7463
7464         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7465         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7466         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7467         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7468         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7469         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7470
7471         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7472         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7473
7474         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7475
7476         /* enable CP interrupts on all rings */
7477         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7478                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7479                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7480         }
7481         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7482                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7483                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7484                 if (ring->me == 1) {
7485                         switch (ring->pipe) {
7486                         case 0:
7487                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7488                                 break;
7489                         default:
7490                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7491                                 break;
7492                         }
7493                 } else {
7494                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7495                 }
7496         }
7497         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7498                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7499                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7500                 if (ring->me == 1) {
7501                         switch (ring->pipe) {
7502                         case 0:
7503                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7504                                 break;
7505                         default:
7506                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7507                                 break;
7508                         }
7509                 } else {
7510                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7511                 }
7512         }
7513
7514         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7515                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7516                 dma_cntl |= TRAP_ENABLE;
7517         }
7518
7519         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7520                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7521                 dma_cntl1 |= TRAP_ENABLE;
7522         }
7523
7524         if (rdev->irq.crtc_vblank_int[0] ||
7525             atomic_read(&rdev->irq.pflip[0])) {
7526                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7527                 crtc1 |= VBLANK_INTERRUPT_MASK;
7528         }
7529         if (rdev->irq.crtc_vblank_int[1] ||
7530             atomic_read(&rdev->irq.pflip[1])) {
7531                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7532                 crtc2 |= VBLANK_INTERRUPT_MASK;
7533         }
7534         if (rdev->irq.crtc_vblank_int[2] ||
7535             atomic_read(&rdev->irq.pflip[2])) {
7536                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7537                 crtc3 |= VBLANK_INTERRUPT_MASK;
7538         }
7539         if (rdev->irq.crtc_vblank_int[3] ||
7540             atomic_read(&rdev->irq.pflip[3])) {
7541                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7542                 crtc4 |= VBLANK_INTERRUPT_MASK;
7543         }
7544         if (rdev->irq.crtc_vblank_int[4] ||
7545             atomic_read(&rdev->irq.pflip[4])) {
7546                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7547                 crtc5 |= VBLANK_INTERRUPT_MASK;
7548         }
7549         if (rdev->irq.crtc_vblank_int[5] ||
7550             atomic_read(&rdev->irq.pflip[5])) {
7551                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7552                 crtc6 |= VBLANK_INTERRUPT_MASK;
7553         }
7554         if (rdev->irq.hpd[0]) {
7555                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7556                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7557         }
7558         if (rdev->irq.hpd[1]) {
7559                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7560                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7561         }
7562         if (rdev->irq.hpd[2]) {
7563                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7564                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7565         }
7566         if (rdev->irq.hpd[3]) {
7567                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7568                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7569         }
7570         if (rdev->irq.hpd[4]) {
7571                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7572                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7573         }
7574         if (rdev->irq.hpd[5]) {
7575                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7576                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7577         }
7578
7579         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7580
7581         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7582         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7583
7584         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7585
7586         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7587
7588         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7589         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7590         if (rdev->num_crtc >= 4) {
7591                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7592                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7593         }
7594         if (rdev->num_crtc >= 6) {
7595                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7596                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7597         }
7598
7599         if (rdev->num_crtc >= 2) {
7600                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7601                        GRPH_PFLIP_INT_MASK);
7602                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7603                        GRPH_PFLIP_INT_MASK);
7604         }
7605         if (rdev->num_crtc >= 4) {
7606                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7607                        GRPH_PFLIP_INT_MASK);
7608                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7609                        GRPH_PFLIP_INT_MASK);
7610         }
7611         if (rdev->num_crtc >= 6) {
7612                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7613                        GRPH_PFLIP_INT_MASK);
7614                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7615                        GRPH_PFLIP_INT_MASK);
7616         }
7617
7618         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7619         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7620         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7621         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7622         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7623         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7624
7625         /* posting read */
7626         RREG32(SRBM_STATUS);
7627
7628         return 0;
7629 }
7630
7631 /**
7632  * cik_irq_ack - ack interrupt sources
7633  *
7634  * @rdev: radeon_device pointer
7635  *
7636  * Ack interrupt sources on the GPU (vblanks, hpd,
7637  * etc.) (CIK).  Certain interrupts sources are sw
7638  * generated and do not require an explicit ack.
7639  */
7640 static inline void cik_irq_ack(struct radeon_device *rdev)
7641 {
7642         u32 tmp;
7643
7644         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7645         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7646         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7647         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7648         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7649         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7650         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7651
7652         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7653                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7654         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7655                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7656         if (rdev->num_crtc >= 4) {
7657                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7658                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7659                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7660                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7661         }
7662         if (rdev->num_crtc >= 6) {
7663                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7664                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7665                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7666                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7667         }
7668
7669         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7670                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7671                        GRPH_PFLIP_INT_CLEAR);
7672         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7673                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7674                        GRPH_PFLIP_INT_CLEAR);
7675         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7676                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7677         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7678                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7679         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7680                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7681         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7682                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7683
7684         if (rdev->num_crtc >= 4) {
7685                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7686                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7687                                GRPH_PFLIP_INT_CLEAR);
7688                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7689                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7690                                GRPH_PFLIP_INT_CLEAR);
7691                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7692                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7693                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7694                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7695                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7696                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7697                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7698                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7699         }
7700
7701         if (rdev->num_crtc >= 6) {
7702                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7703                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7704                                GRPH_PFLIP_INT_CLEAR);
7705                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7706                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7707                                GRPH_PFLIP_INT_CLEAR);
7708                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7709                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7710                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7711                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7712                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7713                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7714                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7715                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7716         }
7717
7718         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7719                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7720                 tmp |= DC_HPDx_INT_ACK;
7721                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7722         }
7723         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7724                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7725                 tmp |= DC_HPDx_INT_ACK;
7726                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7727         }
7728         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7729                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7730                 tmp |= DC_HPDx_INT_ACK;
7731                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7732         }
7733         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7734                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7735                 tmp |= DC_HPDx_INT_ACK;
7736                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7737         }
7738         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7739                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7740                 tmp |= DC_HPDx_INT_ACK;
7741                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7742         }
7743         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7744                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7745                 tmp |= DC_HPDx_INT_ACK;
7746                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7747         }
7748         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7749                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7750                 tmp |= DC_HPDx_RX_INT_ACK;
7751                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7752         }
7753         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7754                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7755                 tmp |= DC_HPDx_RX_INT_ACK;
7756                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7757         }
7758         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7759                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7760                 tmp |= DC_HPDx_RX_INT_ACK;
7761                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7762         }
7763         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7764                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7765                 tmp |= DC_HPDx_RX_INT_ACK;
7766                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7767         }
7768         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7769                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7770                 tmp |= DC_HPDx_RX_INT_ACK;
7771                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7772         }
7773         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7774                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7775                 tmp |= DC_HPDx_RX_INT_ACK;
7776                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7777         }
7778 }
7779
7780 /**
7781  * cik_irq_disable - disable interrupts
7782  *
7783  * @rdev: radeon_device pointer
7784  *
7785  * Disable interrupts on the hw (CIK).
7786  */
7787 static void cik_irq_disable(struct radeon_device *rdev)
7788 {
7789         cik_disable_interrupts(rdev);
7790         /* Wait and acknowledge irq */
7791         mdelay(1);
7792         cik_irq_ack(rdev);
7793         cik_disable_interrupt_state(rdev);
7794 }
7795
7796 /**
7797  * cik_irq_disable - disable interrupts for suspend
7798  *
7799  * @rdev: radeon_device pointer
7800  *
7801  * Disable interrupts and stop the RLC (CIK).
7802  * Used for suspend.
7803  */
7804 static void cik_irq_suspend(struct radeon_device *rdev)
7805 {
7806         cik_irq_disable(rdev);
7807         cik_rlc_stop(rdev);
7808 }
7809
7810 /**
7811  * cik_irq_fini - tear down interrupt support
7812  *
7813  * @rdev: radeon_device pointer
7814  *
7815  * Disable interrupts on the hw and free the IH ring
7816  * buffer (CIK).
7817  * Used for driver unload.
7818  */
7819 static void cik_irq_fini(struct radeon_device *rdev)
7820 {
7821         cik_irq_suspend(rdev);
7822         r600_ih_ring_fini(rdev);
7823 }
7824
7825 /**
7826  * cik_get_ih_wptr - get the IH ring buffer wptr
7827  *
7828  * @rdev: radeon_device pointer
7829  *
7830  * Get the IH ring buffer wptr from either the register
7831  * or the writeback memory buffer (CIK).  Also check for
7832  * ring buffer overflow and deal with it.
7833  * Used by cik_irq_process().
7834  * Returns the value of the wptr.
7835  */
7836 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7837 {
7838         u32 wptr, tmp;
7839
7840         if (rdev->wb.enabled)
7841                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7842         else
7843                 wptr = RREG32(IH_RB_WPTR);
7844
7845         if (wptr & RB_OVERFLOW) {
7846                 wptr &= ~RB_OVERFLOW;
7847                 /* When a ring buffer overflow happen start parsing interrupt
7848                  * from the last not overwritten vector (wptr + 16). Hopefully
7849                  * this should allow us to catchup.
7850                  */
7851                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7852                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7853                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7854                 tmp = RREG32(IH_RB_CNTL);
7855                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7856                 WREG32(IH_RB_CNTL, tmp);
7857         }
7858         return (wptr & rdev->ih.ptr_mask);
7859 }
7860
7861 /*        CIK IV Ring
7862  * Each IV ring entry is 128 bits:
7863  * [7:0]    - interrupt source id
7864  * [31:8]   - reserved
7865  * [59:32]  - interrupt source data
7866  * [63:60]  - reserved
7867  * [71:64]  - RINGID
7868  *            CP:
7869  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7870  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7871  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7872  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7873  *            PIPE_ID - ME0 0=3D
7874  *                    - ME1&2 compute dispatcher (4 pipes each)
7875  *            SDMA:
7876  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7877  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7878  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7879  * [79:72]  - VMID
7880  * [95:80]  - PASID
7881  * [127:96] - reserved
7882  */
7883 /**
7884  * cik_irq_process - interrupt handler
7885  *
7886  * @rdev: radeon_device pointer
7887  *
7888  * Interrupt hander (CIK).  Walk the IH ring,
7889  * ack interrupts and schedule work to handle
7890  * interrupt events.
7891  * Returns irq process return code.
7892  */
7893 int cik_irq_process(struct radeon_device *rdev)
7894 {
7895         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7896         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7897         u32 wptr;
7898         u32 rptr;
7899         u32 src_id, src_data, ring_id;
7900         u8 me_id, pipe_id, queue_id;
7901         u32 ring_index;
7902         bool queue_hotplug = false;
7903         bool queue_dp = false;
7904         bool queue_reset = false;
7905         u32 addr, status, mc_client;
7906         bool queue_thermal = false;
7907
7908         if (!rdev->ih.enabled || rdev->shutdown)
7909                 return IRQ_NONE;
7910
7911         wptr = cik_get_ih_wptr(rdev);
7912
7913 restart_ih:
7914         /* is somebody else already processing irqs? */
7915         if (atomic_xchg(&rdev->ih.lock, 1))
7916                 return IRQ_NONE;
7917
7918         rptr = rdev->ih.rptr;
7919         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7920
7921         /* Order reading of wptr vs. reading of IH ring data */
7922         rmb();
7923
7924         /* display interrupts */
7925         cik_irq_ack(rdev);
7926
7927         while (rptr != wptr) {
7928                 /* wptr/rptr are in bytes! */
7929                 ring_index = rptr / 4;
7930
7931                 radeon_kfd_interrupt(rdev,
7932                                 (const void *) &rdev->ih.ring[ring_index]);
7933
7934                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7935                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7936                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7937
7938                 switch (src_id) {
7939                 case 1: /* D1 vblank/vline */
7940                         switch (src_data) {
7941                         case 0: /* D1 vblank */
7942                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7943                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7944
7945                                 if (rdev->irq.crtc_vblank_int[0]) {
7946                                         drm_handle_vblank(rdev->ddev, 0);
7947                                         rdev->pm.vblank_sync = true;
7948                                         wake_up(&rdev->irq.vblank_queue);
7949                                 }
7950                                 if (atomic_read(&rdev->irq.pflip[0]))
7951                                         radeon_crtc_handle_vblank(rdev, 0);
7952                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7953                                 DRM_DEBUG("IH: D1 vblank\n");
7954
7955                                 break;
7956                         case 1: /* D1 vline */
7957                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7958                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7959
7960                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7961                                 DRM_DEBUG("IH: D1 vline\n");
7962
7963                                 break;
7964                         default:
7965                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7966                                 break;
7967                         }
7968                         break;
7969                 case 2: /* D2 vblank/vline */
7970                         switch (src_data) {
7971                         case 0: /* D2 vblank */
7972                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7973                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7974
7975                                 if (rdev->irq.crtc_vblank_int[1]) {
7976                                         drm_handle_vblank(rdev->ddev, 1);
7977                                         rdev->pm.vblank_sync = true;
7978                                         wake_up(&rdev->irq.vblank_queue);
7979                                 }
7980                                 if (atomic_read(&rdev->irq.pflip[1]))
7981                                         radeon_crtc_handle_vblank(rdev, 1);
7982                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7983                                 DRM_DEBUG("IH: D2 vblank\n");
7984
7985                                 break;
7986                         case 1: /* D2 vline */
7987                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7988                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7989
7990                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7991                                 DRM_DEBUG("IH: D2 vline\n");
7992
7993                                 break;
7994                         default:
7995                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7996                                 break;
7997                         }
7998                         break;
7999                 case 3: /* D3 vblank/vline */
8000                         switch (src_data) {
8001                         case 0: /* D3 vblank */
8002                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8003                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8004
8005                                 if (rdev->irq.crtc_vblank_int[2]) {
8006                                         drm_handle_vblank(rdev->ddev, 2);
8007                                         rdev->pm.vblank_sync = true;
8008                                         wake_up(&rdev->irq.vblank_queue);
8009                                 }
8010                                 if (atomic_read(&rdev->irq.pflip[2]))
8011                                         radeon_crtc_handle_vblank(rdev, 2);
8012                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8013                                 DRM_DEBUG("IH: D3 vblank\n");
8014
8015                                 break;
8016                         case 1: /* D3 vline */
8017                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8018                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8019
8020                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8021                                 DRM_DEBUG("IH: D3 vline\n");
8022
8023                                 break;
8024                         default:
8025                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8026                                 break;
8027                         }
8028                         break;
8029                 case 4: /* D4 vblank/vline */
8030                         switch (src_data) {
8031                         case 0: /* D4 vblank */
8032                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8033                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8034
8035                                 if (rdev->irq.crtc_vblank_int[3]) {
8036                                         drm_handle_vblank(rdev->ddev, 3);
8037                                         rdev->pm.vblank_sync = true;
8038                                         wake_up(&rdev->irq.vblank_queue);
8039                                 }
8040                                 if (atomic_read(&rdev->irq.pflip[3]))
8041                                         radeon_crtc_handle_vblank(rdev, 3);
8042                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8043                                 DRM_DEBUG("IH: D4 vblank\n");
8044
8045                                 break;
8046                         case 1: /* D4 vline */
8047                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8048                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8049
8050                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8051                                 DRM_DEBUG("IH: D4 vline\n");
8052
8053                                 break;
8054                         default:
8055                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8056                                 break;
8057                         }
8058                         break;
8059                 case 5: /* D5 vblank/vline */
8060                         switch (src_data) {
8061                         case 0: /* D5 vblank */
8062                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8063                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8064
8065                                 if (rdev->irq.crtc_vblank_int[4]) {
8066                                         drm_handle_vblank(rdev->ddev, 4);
8067                                         rdev->pm.vblank_sync = true;
8068                                         wake_up(&rdev->irq.vblank_queue);
8069                                 }
8070                                 if (atomic_read(&rdev->irq.pflip[4]))
8071                                         radeon_crtc_handle_vblank(rdev, 4);
8072                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8073                                 DRM_DEBUG("IH: D5 vblank\n");
8074
8075                                 break;
8076                         case 1: /* D5 vline */
8077                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8078                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8079
8080                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8081                                 DRM_DEBUG("IH: D5 vline\n");
8082
8083                                 break;
8084                         default:
8085                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8086                                 break;
8087                         }
8088                         break;
8089                 case 6: /* D6 vblank/vline */
8090                         switch (src_data) {
8091                         case 0: /* D6 vblank */
8092                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8093                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8094
8095                                 if (rdev->irq.crtc_vblank_int[5]) {
8096                                         drm_handle_vblank(rdev->ddev, 5);
8097                                         rdev->pm.vblank_sync = true;
8098                                         wake_up(&rdev->irq.vblank_queue);
8099                                 }
8100                                 if (atomic_read(&rdev->irq.pflip[5]))
8101                                         radeon_crtc_handle_vblank(rdev, 5);
8102                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8103                                 DRM_DEBUG("IH: D6 vblank\n");
8104
8105                                 break;
8106                         case 1: /* D6 vline */
8107                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8108                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8109
8110                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8111                                 DRM_DEBUG("IH: D6 vline\n");
8112
8113                                 break;
8114                         default:
8115                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8116                                 break;
8117                         }
8118                         break;
8119                 case 8: /* D1 page flip */
8120                 case 10: /* D2 page flip */
8121                 case 12: /* D3 page flip */
8122                 case 14: /* D4 page flip */
8123                 case 16: /* D5 page flip */
8124                 case 18: /* D6 page flip */
8125                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8126                         if (radeon_use_pflipirq > 0)
8127                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8128                         break;
8129                 case 42: /* HPD hotplug */
8130                         switch (src_data) {
8131                         case 0:
8132                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8133                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8134
8135                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8136                                 queue_hotplug = true;
8137                                 DRM_DEBUG("IH: HPD1\n");
8138
8139                                 break;
8140                         case 1:
8141                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8142                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8143
8144                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8145                                 queue_hotplug = true;
8146                                 DRM_DEBUG("IH: HPD2\n");
8147
8148                                 break;
8149                         case 2:
8150                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8151                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8152
8153                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8154                                 queue_hotplug = true;
8155                                 DRM_DEBUG("IH: HPD3\n");
8156
8157                                 break;
8158                         case 3:
8159                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8160                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8161
8162                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8163                                 queue_hotplug = true;
8164                                 DRM_DEBUG("IH: HPD4\n");
8165
8166                                 break;
8167                         case 4:
8168                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8169                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8170
8171                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8172                                 queue_hotplug = true;
8173                                 DRM_DEBUG("IH: HPD5\n");
8174
8175                                 break;
8176                         case 5:
8177                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8178                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8179
8180                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8181                                 queue_hotplug = true;
8182                                 DRM_DEBUG("IH: HPD6\n");
8183
8184                                 break;
8185                         case 6:
8186                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8187                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8188
8189                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8190                                 queue_dp = true;
8191                                 DRM_DEBUG("IH: HPD_RX 1\n");
8192
8193                                 break;
8194                         case 7:
8195                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8196                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8197
8198                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8199                                 queue_dp = true;
8200                                 DRM_DEBUG("IH: HPD_RX 2\n");
8201
8202                                 break;
8203                         case 8:
8204                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8205                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8206
8207                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8208                                 queue_dp = true;
8209                                 DRM_DEBUG("IH: HPD_RX 3\n");
8210
8211                                 break;
8212                         case 9:
8213                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8214                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8215
8216                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8217                                 queue_dp = true;
8218                                 DRM_DEBUG("IH: HPD_RX 4\n");
8219
8220                                 break;
8221                         case 10:
8222                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8223                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8224
8225                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8226                                 queue_dp = true;
8227                                 DRM_DEBUG("IH: HPD_RX 5\n");
8228
8229                                 break;
8230                         case 11:
8231                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8232                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8233
8234                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8235                                 queue_dp = true;
8236                                 DRM_DEBUG("IH: HPD_RX 6\n");
8237
8238                                 break;
8239                         default:
8240                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8241                                 break;
8242                         }
8243                         break;
8244                 case 96:
8245                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8246                         WREG32(SRBM_INT_ACK, 0x1);
8247                         break;
8248                 case 124: /* UVD */
8249                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8250                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8251                         break;
8252                 case 146:
8253                 case 147:
8254                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8255                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8256                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8257                         /* reset addr and status */
8258                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8259                         if (addr == 0x0 && status == 0x0)
8260                                 break;
8261                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8262                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8263                                 addr);
8264                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8265                                 status);
8266                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8267                         break;
8268                 case 167: /* VCE */
8269                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8270                         switch (src_data) {
8271                         case 0:
8272                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8273                                 break;
8274                         case 1:
8275                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8276                                 break;
8277                         default:
8278                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8279                                 break;
8280                         }
8281                         break;
8282                 case 176: /* GFX RB CP_INT */
8283                 case 177: /* GFX IB CP_INT */
8284                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8285                         break;
8286                 case 181: /* CP EOP event */
8287                         DRM_DEBUG("IH: CP EOP\n");
8288                         /* XXX check the bitfield order! */
8289                         me_id = (ring_id & 0x60) >> 5;
8290                         pipe_id = (ring_id & 0x18) >> 3;
8291                         queue_id = (ring_id & 0x7) >> 0;
8292                         switch (me_id) {
8293                         case 0:
8294                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8295                                 break;
8296                         case 1:
8297                         case 2:
8298                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8299                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8300                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8301                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8302                                 break;
8303                         }
8304                         break;
8305                 case 184: /* CP Privileged reg access */
8306                         DRM_ERROR("Illegal register access in command stream\n");
8307                         /* XXX check the bitfield order! */
8308                         me_id = (ring_id & 0x60) >> 5;
8309                         pipe_id = (ring_id & 0x18) >> 3;
8310                         queue_id = (ring_id & 0x7) >> 0;
8311                         switch (me_id) {
8312                         case 0:
8313                                 /* This results in a full GPU reset, but all we need to do is soft
8314                                  * reset the CP for gfx
8315                                  */
8316                                 queue_reset = true;
8317                                 break;
8318                         case 1:
8319                                 /* XXX compute */
8320                                 queue_reset = true;
8321                                 break;
8322                         case 2:
8323                                 /* XXX compute */
8324                                 queue_reset = true;
8325                                 break;
8326                         }
8327                         break;
8328                 case 185: /* CP Privileged inst */
8329                         DRM_ERROR("Illegal instruction in command stream\n");
8330                         /* XXX check the bitfield order! */
8331                         me_id = (ring_id & 0x60) >> 5;
8332                         pipe_id = (ring_id & 0x18) >> 3;
8333                         queue_id = (ring_id & 0x7) >> 0;
8334                         switch (me_id) {
8335                         case 0:
8336                                 /* This results in a full GPU reset, but all we need to do is soft
8337                                  * reset the CP for gfx
8338                                  */
8339                                 queue_reset = true;
8340                                 break;
8341                         case 1:
8342                                 /* XXX compute */
8343                                 queue_reset = true;
8344                                 break;
8345                         case 2:
8346                                 /* XXX compute */
8347                                 queue_reset = true;
8348                                 break;
8349                         }
8350                         break;
8351                 case 224: /* SDMA trap event */
8352                         /* XXX check the bitfield order! */
8353                         me_id = (ring_id & 0x3) >> 0;
8354                         queue_id = (ring_id & 0xc) >> 2;
8355                         DRM_DEBUG("IH: SDMA trap\n");
8356                         switch (me_id) {
8357                         case 0:
8358                                 switch (queue_id) {
8359                                 case 0:
8360                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8361                                         break;
8362                                 case 1:
8363                                         /* XXX compute */
8364                                         break;
8365                                 case 2:
8366                                         /* XXX compute */
8367                                         break;
8368                                 }
8369                                 break;
8370                         case 1:
8371                                 switch (queue_id) {
8372                                 case 0:
8373                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8374                                         break;
8375                                 case 1:
8376                                         /* XXX compute */
8377                                         break;
8378                                 case 2:
8379                                         /* XXX compute */
8380                                         break;
8381                                 }
8382                                 break;
8383                         }
8384                         break;
8385                 case 230: /* thermal low to high */
8386                         DRM_DEBUG("IH: thermal low to high\n");
8387                         rdev->pm.dpm.thermal.high_to_low = false;
8388                         queue_thermal = true;
8389                         break;
8390                 case 231: /* thermal high to low */
8391                         DRM_DEBUG("IH: thermal high to low\n");
8392                         rdev->pm.dpm.thermal.high_to_low = true;
8393                         queue_thermal = true;
8394                         break;
8395                 case 233: /* GUI IDLE */
8396                         DRM_DEBUG("IH: GUI idle\n");
8397                         break;
8398                 case 241: /* SDMA Privileged inst */
8399                 case 247: /* SDMA Privileged inst */
8400                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8401                         /* XXX check the bitfield order! */
8402                         me_id = (ring_id & 0x3) >> 0;
8403                         queue_id = (ring_id & 0xc) >> 2;
8404                         switch (me_id) {
8405                         case 0:
8406                                 switch (queue_id) {
8407                                 case 0:
8408                                         queue_reset = true;
8409                                         break;
8410                                 case 1:
8411                                         /* XXX compute */
8412                                         queue_reset = true;
8413                                         break;
8414                                 case 2:
8415                                         /* XXX compute */
8416                                         queue_reset = true;
8417                                         break;
8418                                 }
8419                                 break;
8420                         case 1:
8421                                 switch (queue_id) {
8422                                 case 0:
8423                                         queue_reset = true;
8424                                         break;
8425                                 case 1:
8426                                         /* XXX compute */
8427                                         queue_reset = true;
8428                                         break;
8429                                 case 2:
8430                                         /* XXX compute */
8431                                         queue_reset = true;
8432                                         break;
8433                                 }
8434                                 break;
8435                         }
8436                         break;
8437                 default:
8438                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8439                         break;
8440                 }
8441
8442                 /* wptr/rptr are in bytes! */
8443                 rptr += 16;
8444                 rptr &= rdev->ih.ptr_mask;
8445                 WREG32(IH_RB_RPTR, rptr);
8446         }
8447         if (queue_dp)
8448                 schedule_work(&rdev->dp_work);
8449         if (queue_hotplug)
8450                 schedule_work(&rdev->hotplug_work);
8451         if (queue_reset) {
8452                 rdev->needs_reset = true;
8453                 wake_up_all(&rdev->fence_queue);
8454         }
8455         if (queue_thermal)
8456                 schedule_work(&rdev->pm.dpm.thermal.work);
8457         rdev->ih.rptr = rptr;
8458         atomic_set(&rdev->ih.lock, 0);
8459
8460         /* make sure wptr hasn't changed while processing */
8461         wptr = cik_get_ih_wptr(rdev);
8462         if (wptr != rptr)
8463                 goto restart_ih;
8464
8465         return IRQ_HANDLED;
8466 }
8467
8468 /*
8469  * startup/shutdown callbacks
8470  */
8471 /**
8472  * cik_startup - program the asic to a functional state
8473  *
8474  * @rdev: radeon_device pointer
8475  *
8476  * Programs the asic to a functional state (CIK).
8477  * Called by cik_init() and cik_resume().
8478  * Returns 0 for success, error for failure.
8479  */
8480 static int cik_startup(struct radeon_device *rdev)
8481 {
8482         struct radeon_ring *ring;
8483         u32 nop;
8484         int r;
8485
8486         /* enable pcie gen2/3 link */
8487         cik_pcie_gen3_enable(rdev);
8488         /* enable aspm */
8489         cik_program_aspm(rdev);
8490
8491         /* scratch needs to be initialized before MC */
8492         r = r600_vram_scratch_init(rdev);
8493         if (r)
8494                 return r;
8495
8496         cik_mc_program(rdev);
8497
8498         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8499                 r = ci_mc_load_microcode(rdev);
8500                 if (r) {
8501                         DRM_ERROR("Failed to load MC firmware!\n");
8502                         return r;
8503                 }
8504         }
8505
8506         r = cik_pcie_gart_enable(rdev);
8507         if (r)
8508                 return r;
8509         cik_gpu_init(rdev);
8510
8511         /* allocate rlc buffers */
8512         if (rdev->flags & RADEON_IS_IGP) {
8513                 if (rdev->family == CHIP_KAVERI) {
8514                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8515                         rdev->rlc.reg_list_size =
8516                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8517                 } else {
8518                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8519                         rdev->rlc.reg_list_size =
8520                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8521                 }
8522         }
8523         rdev->rlc.cs_data = ci_cs_data;
8524         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8525         r = sumo_rlc_init(rdev);
8526         if (r) {
8527                 DRM_ERROR("Failed to init rlc BOs!\n");
8528                 return r;
8529         }
8530
8531         /* allocate wb buffer */
8532         r = radeon_wb_init(rdev);
8533         if (r)
8534                 return r;
8535
8536         /* allocate mec buffers */
8537         r = cik_mec_init(rdev);
8538         if (r) {
8539                 DRM_ERROR("Failed to init MEC BOs!\n");
8540                 return r;
8541         }
8542
8543         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8544         if (r) {
8545                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8546                 return r;
8547         }
8548
8549         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8550         if (r) {
8551                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8552                 return r;
8553         }
8554
8555         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8556         if (r) {
8557                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8558                 return r;
8559         }
8560
8561         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8562         if (r) {
8563                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8564                 return r;
8565         }
8566
8567         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8568         if (r) {
8569                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8570                 return r;
8571         }
8572
8573         r = radeon_uvd_resume(rdev);
8574         if (!r) {
8575                 r = uvd_v4_2_resume(rdev);
8576                 if (!r) {
8577                         r = radeon_fence_driver_start_ring(rdev,
8578                                                            R600_RING_TYPE_UVD_INDEX);
8579                         if (r)
8580                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8581                 }
8582         }
8583         if (r)
8584                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8585
8586         r = radeon_vce_resume(rdev);
8587         if (!r) {
8588                 r = vce_v2_0_resume(rdev);
8589                 if (!r)
8590                         r = radeon_fence_driver_start_ring(rdev,
8591                                                            TN_RING_TYPE_VCE1_INDEX);
8592                 if (!r)
8593                         r = radeon_fence_driver_start_ring(rdev,
8594                                                            TN_RING_TYPE_VCE2_INDEX);
8595         }
8596         if (r) {
8597                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8598                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8599                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8600         }
8601
8602         /* Enable IRQ */
8603         if (!rdev->irq.installed) {
8604                 r = radeon_irq_kms_init(rdev);
8605                 if (r)
8606                         return r;
8607         }
8608
8609         r = cik_irq_init(rdev);
8610         if (r) {
8611                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8612                 radeon_irq_kms_fini(rdev);
8613                 return r;
8614         }
8615         cik_irq_set(rdev);
8616
8617         if (rdev->family == CHIP_HAWAII) {
8618                 if (rdev->new_fw)
8619                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8620                 else
8621                         nop = RADEON_CP_PACKET2;
8622         } else {
8623                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8624         }
8625
8626         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8627         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8628                              nop);
8629         if (r)
8630                 return r;
8631
8632         /* set up the compute queues */
8633         /* type-2 packets are deprecated on MEC, use type-3 instead */
8634         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8635         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8636                              nop);
8637         if (r)
8638                 return r;
8639         ring->me = 1; /* first MEC */
8640         ring->pipe = 0; /* first pipe */
8641         ring->queue = 0; /* first queue */
8642         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8643
8644         /* type-2 packets are deprecated on MEC, use type-3 instead */
8645         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8646         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8647                              nop);
8648         if (r)
8649                 return r;
8650         /* dGPU only have 1 MEC */
8651         ring->me = 1; /* first MEC */
8652         ring->pipe = 0; /* first pipe */
8653         ring->queue = 1; /* second queue */
8654         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8655
8656         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8657         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8658                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8659         if (r)
8660                 return r;
8661
8662         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8663         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8664                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8665         if (r)
8666                 return r;
8667
8668         r = cik_cp_resume(rdev);
8669         if (r)
8670                 return r;
8671
8672         r = cik_sdma_resume(rdev);
8673         if (r)
8674                 return r;
8675
8676         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8677         if (ring->ring_size) {
8678                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8679                                      RADEON_CP_PACKET2);
8680                 if (!r)
8681                         r = uvd_v1_0_init(rdev);
8682                 if (r)
8683                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8684         }
8685
8686         r = -ENOENT;
8687
8688         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8689         if (ring->ring_size)
8690                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8691                                      VCE_CMD_NO_OP);
8692
8693         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8694         if (ring->ring_size)
8695                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8696                                      VCE_CMD_NO_OP);
8697
8698         if (!r)
8699                 r = vce_v1_0_init(rdev);
8700         else if (r != -ENOENT)
8701                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8702
8703         r = radeon_ib_pool_init(rdev);
8704         if (r) {
8705                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8706                 return r;
8707         }
8708
8709         r = radeon_vm_manager_init(rdev);
8710         if (r) {
8711                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8712                 return r;
8713         }
8714
8715         r = radeon_audio_init(rdev);
8716         if (r)
8717                 return r;
8718
8719         r = radeon_kfd_resume(rdev);
8720         if (r)
8721                 return r;
8722
8723         return 0;
8724 }
8725
8726 /**
8727  * cik_resume - resume the asic to a functional state
8728  *
8729  * @rdev: radeon_device pointer
8730  *
8731  * Programs the asic to a functional state (CIK).
8732  * Called at resume.
8733  * Returns 0 for success, error for failure.
8734  */
8735 int cik_resume(struct radeon_device *rdev)
8736 {
8737         int r;
8738
8739         /* post card */
8740         atom_asic_init(rdev->mode_info.atom_context);
8741
8742         /* init golden registers */
8743         cik_init_golden_registers(rdev);
8744
8745         if (rdev->pm.pm_method == PM_METHOD_DPM)
8746                 radeon_pm_resume(rdev);
8747
8748         rdev->accel_working = true;
8749         r = cik_startup(rdev);
8750         if (r) {
8751                 DRM_ERROR("cik startup failed on resume\n");
8752                 rdev->accel_working = false;
8753                 return r;
8754         }
8755
8756         return r;
8757
8758 }
8759
8760 /**
8761  * cik_suspend - suspend the asic
8762  *
8763  * @rdev: radeon_device pointer
8764  *
8765  * Bring the chip into a state suitable for suspend (CIK).
8766  * Called at suspend.
8767  * Returns 0 for success.
8768  */
8769 int cik_suspend(struct radeon_device *rdev)
8770 {
8771         radeon_kfd_suspend(rdev);
8772         radeon_pm_suspend(rdev);
8773         radeon_audio_fini(rdev);
8774         radeon_vm_manager_fini(rdev);
8775         cik_cp_enable(rdev, false);
8776         cik_sdma_enable(rdev, false);
8777         uvd_v1_0_fini(rdev);
8778         radeon_uvd_suspend(rdev);
8779         radeon_vce_suspend(rdev);
8780         cik_fini_pg(rdev);
8781         cik_fini_cg(rdev);
8782         cik_irq_suspend(rdev);
8783         radeon_wb_disable(rdev);
8784         cik_pcie_gart_disable(rdev);
8785         return 0;
8786 }
8787
8788 /* Plan is to move initialization in that function and use
8789  * helper function so that radeon_device_init pretty much
8790  * do nothing more than calling asic specific function. This
8791  * should also allow to remove a bunch of callback function
8792  * like vram_info.
8793  */
8794 /**
8795  * cik_init - asic specific driver and hw init
8796  *
8797  * @rdev: radeon_device pointer
8798  *
8799  * Setup asic specific driver variables and program the hw
8800  * to a functional state (CIK).
8801  * Called at driver startup.
8802  * Returns 0 for success, errors for failure.
8803  */
8804 int cik_init(struct radeon_device *rdev)
8805 {
8806         struct radeon_ring *ring;
8807         int r;
8808
8809         /* Read BIOS */
8810         if (!radeon_get_bios(rdev)) {
8811                 if (ASIC_IS_AVIVO(rdev))
8812                         return -EINVAL;
8813         }
8814         /* Must be an ATOMBIOS */
8815         if (!rdev->is_atom_bios) {
8816                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8817                 return -EINVAL;
8818         }
8819         r = radeon_atombios_init(rdev);
8820         if (r)
8821                 return r;
8822
8823         /* Post card if necessary */
8824         if (!radeon_card_posted(rdev)) {
8825                 if (!rdev->bios) {
8826                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8827                         return -EINVAL;
8828                 }
8829                 DRM_INFO("GPU not posted. posting now...\n");
8830                 atom_asic_init(rdev->mode_info.atom_context);
8831         }
8832         /* init golden registers */
8833         cik_init_golden_registers(rdev);
8834         /* Initialize scratch registers */
8835         cik_scratch_init(rdev);
8836         /* Initialize surface registers */
8837         radeon_surface_init(rdev);
8838         /* Initialize clocks */
8839         radeon_get_clock_info(rdev->ddev);
8840
8841         /* Fence driver */
8842         r = radeon_fence_driver_init(rdev);
8843         if (r)
8844                 return r;
8845
8846         /* initialize memory controller */
8847         r = cik_mc_init(rdev);
8848         if (r)
8849                 return r;
8850         /* Memory manager */
8851         r = radeon_bo_init(rdev);
8852         if (r)
8853                 return r;
8854
8855         if (rdev->flags & RADEON_IS_IGP) {
8856                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8857                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8858                         r = cik_init_microcode(rdev);
8859                         if (r) {
8860                                 DRM_ERROR("Failed to load firmware!\n");
8861                                 return r;
8862                         }
8863                 }
8864         } else {
8865                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8866                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8867                     !rdev->mc_fw) {
8868                         r = cik_init_microcode(rdev);
8869                         if (r) {
8870                                 DRM_ERROR("Failed to load firmware!\n");
8871                                 return r;
8872                         }
8873                 }
8874         }
8875
8876         /* Initialize power management */
8877         radeon_pm_init(rdev);
8878
8879         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8880         ring->ring_obj = NULL;
8881         r600_ring_init(rdev, ring, 1024 * 1024);
8882
8883         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8884         ring->ring_obj = NULL;
8885         r600_ring_init(rdev, ring, 1024 * 1024);
8886         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8887         if (r)
8888                 return r;
8889
8890         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8891         ring->ring_obj = NULL;
8892         r600_ring_init(rdev, ring, 1024 * 1024);
8893         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8894         if (r)
8895                 return r;
8896
8897         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8898         ring->ring_obj = NULL;
8899         r600_ring_init(rdev, ring, 256 * 1024);
8900
8901         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8902         ring->ring_obj = NULL;
8903         r600_ring_init(rdev, ring, 256 * 1024);
8904
8905         r = radeon_uvd_init(rdev);
8906         if (!r) {
8907                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8908                 ring->ring_obj = NULL;
8909                 r600_ring_init(rdev, ring, 4096);
8910         }
8911
8912         r = radeon_vce_init(rdev);
8913         if (!r) {
8914                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8915                 ring->ring_obj = NULL;
8916                 r600_ring_init(rdev, ring, 4096);
8917
8918                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8919                 ring->ring_obj = NULL;
8920                 r600_ring_init(rdev, ring, 4096);
8921         }
8922
8923         rdev->ih.ring_obj = NULL;
8924         r600_ih_ring_init(rdev, 64 * 1024);
8925
8926         r = r600_pcie_gart_init(rdev);
8927         if (r)
8928                 return r;
8929
8930         rdev->accel_working = true;
8931         r = cik_startup(rdev);
8932         if (r) {
8933                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8934                 cik_cp_fini(rdev);
8935                 cik_sdma_fini(rdev);
8936                 cik_irq_fini(rdev);
8937                 sumo_rlc_fini(rdev);
8938                 cik_mec_fini(rdev);
8939                 radeon_wb_fini(rdev);
8940                 radeon_ib_pool_fini(rdev);
8941                 radeon_vm_manager_fini(rdev);
8942                 radeon_irq_kms_fini(rdev);
8943                 cik_pcie_gart_fini(rdev);
8944                 rdev->accel_working = false;
8945         }
8946
8947         /* Don't start up if the MC ucode is missing.
8948          * The default clocks and voltages before the MC ucode
8949          * is loaded are not suffient for advanced operations.
8950          */
8951         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8952                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8953                 return -EINVAL;
8954         }
8955
8956         return 0;
8957 }
8958
8959 /**
8960  * cik_fini - asic specific driver and hw fini
8961  *
8962  * @rdev: radeon_device pointer
8963  *
8964  * Tear down the asic specific driver variables and program the hw
8965  * to an idle state (CIK).
8966  * Called at driver unload.
8967  */
8968 void cik_fini(struct radeon_device *rdev)
8969 {
8970         radeon_pm_fini(rdev);
8971         cik_cp_fini(rdev);
8972         cik_sdma_fini(rdev);
8973         cik_fini_pg(rdev);
8974         cik_fini_cg(rdev);
8975         cik_irq_fini(rdev);
8976         sumo_rlc_fini(rdev);
8977         cik_mec_fini(rdev);
8978         radeon_wb_fini(rdev);
8979         radeon_vm_manager_fini(rdev);
8980         radeon_ib_pool_fini(rdev);
8981         radeon_irq_kms_fini(rdev);
8982         uvd_v1_0_fini(rdev);
8983         radeon_uvd_fini(rdev);
8984         radeon_vce_fini(rdev);
8985         cik_pcie_gart_fini(rdev);
8986         r600_vram_scratch_fini(rdev);
8987         radeon_gem_fini(rdev);
8988         radeon_fence_driver_fini(rdev);
8989         radeon_bo_fini(rdev);
8990         radeon_atombios_fini(rdev);
8991         kfree(rdev->bios);
8992         rdev->bios = NULL;
8993 }
8994
8995 void dce8_program_fmt(struct drm_encoder *encoder)
8996 {
8997         struct drm_device *dev = encoder->dev;
8998         struct radeon_device *rdev = dev->dev_private;
8999         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9000         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9001         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9002         int bpc = 0;
9003         u32 tmp = 0;
9004         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9005
9006         if (connector) {
9007                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9008                 bpc = radeon_get_monitor_bpc(connector);
9009                 dither = radeon_connector->dither;
9010         }
9011
9012         /* LVDS/eDP FMT is set up by atom */
9013         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9014                 return;
9015
9016         /* not needed for analog */
9017         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9018             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9019                 return;
9020
9021         if (bpc == 0)
9022                 return;
9023
9024         switch (bpc) {
9025         case 6:
9026                 if (dither == RADEON_FMT_DITHER_ENABLE)
9027                         /* XXX sort out optimal dither settings */
9028                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9029                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9030                 else
9031                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9032                 break;
9033         case 8:
9034                 if (dither == RADEON_FMT_DITHER_ENABLE)
9035                         /* XXX sort out optimal dither settings */
9036                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9037                                 FMT_RGB_RANDOM_ENABLE |
9038                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9039                 else
9040                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9041                 break;
9042         case 10:
9043                 if (dither == RADEON_FMT_DITHER_ENABLE)
9044                         /* XXX sort out optimal dither settings */
9045                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9046                                 FMT_RGB_RANDOM_ENABLE |
9047                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9048                 else
9049                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9050                 break;
9051         default:
9052                 /* not needed */
9053                 break;
9054         }
9055
9056         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9057 }
9058
9059 /* display watermark setup */
9060 /**
9061  * dce8_line_buffer_adjust - Set up the line buffer
9062  *
9063  * @rdev: radeon_device pointer
9064  * @radeon_crtc: the selected display controller
9065  * @mode: the current display mode on the selected display
9066  * controller
9067  *
9068  * Setup up the line buffer allocation for
9069  * the selected display controller (CIK).
9070  * Returns the line buffer size in pixels.
9071  */
9072 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9073                                    struct radeon_crtc *radeon_crtc,
9074                                    struct drm_display_mode *mode)
9075 {
9076         u32 tmp, buffer_alloc, i;
9077         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9078         /*
9079          * Line Buffer Setup
9080          * There are 6 line buffers, one for each display controllers.
9081          * There are 3 partitions per LB. Select the number of partitions
9082          * to enable based on the display width.  For display widths larger
9083          * than 4096, you need use to use 2 display controllers and combine
9084          * them using the stereo blender.
9085          */
9086         if (radeon_crtc->base.enabled && mode) {
9087                 if (mode->crtc_hdisplay < 1920) {
9088                         tmp = 1;
9089                         buffer_alloc = 2;
9090                 } else if (mode->crtc_hdisplay < 2560) {
9091                         tmp = 2;
9092                         buffer_alloc = 2;
9093                 } else if (mode->crtc_hdisplay < 4096) {
9094                         tmp = 0;
9095                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9096                 } else {
9097                         DRM_DEBUG_KMS("Mode too big for LB!\n");
9098                         tmp = 0;
9099                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9100                 }
9101         } else {
9102                 tmp = 1;
9103                 buffer_alloc = 0;
9104         }
9105
9106         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9107                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9108
9109         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9110                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9111         for (i = 0; i < rdev->usec_timeout; i++) {
9112                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9113                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
9114                         break;
9115                 udelay(1);
9116         }
9117
9118         if (radeon_crtc->base.enabled && mode) {
9119                 switch (tmp) {
9120                 case 0:
9121                 default:
9122                         return 4096 * 2;
9123                 case 1:
9124                         return 1920 * 2;
9125                 case 2:
9126                         return 2560 * 2;
9127                 }
9128         }
9129
9130         /* controller not enabled, so no lb used */
9131         return 0;
9132 }
9133
9134 /**
9135  * cik_get_number_of_dram_channels - get the number of dram channels
9136  *
9137  * @rdev: radeon_device pointer
9138  *
9139  * Look up the number of video ram channels (CIK).
9140  * Used for display watermark bandwidth calculations
9141  * Returns the number of dram channels
9142  */
9143 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9144 {
9145         u32 tmp = RREG32(MC_SHARED_CHMAP);
9146
9147         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9148         case 0:
9149         default:
9150                 return 1;
9151         case 1:
9152                 return 2;
9153         case 2:
9154                 return 4;
9155         case 3:
9156                 return 8;
9157         case 4:
9158                 return 3;
9159         case 5:
9160                 return 6;
9161         case 6:
9162                 return 10;
9163         case 7:
9164                 return 12;
9165         case 8:
9166                 return 16;
9167         }
9168 }
9169
9170 struct dce8_wm_params {
9171         u32 dram_channels; /* number of dram channels */
9172         u32 yclk;          /* bandwidth per dram data pin in kHz */
9173         u32 sclk;          /* engine clock in kHz */
9174         u32 disp_clk;      /* display clock in kHz */
9175         u32 src_width;     /* viewport width */
9176         u32 active_time;   /* active display time in ns */
9177         u32 blank_time;    /* blank time in ns */
9178         bool interlaced;    /* mode is interlaced */
9179         fixed20_12 vsc;    /* vertical scale ratio */
9180         u32 num_heads;     /* number of active crtcs */
9181         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9182         u32 lb_size;       /* line buffer allocated to pipe */
9183         u32 vtaps;         /* vertical scaler taps */
9184 };
9185
9186 /**
9187  * dce8_dram_bandwidth - get the dram bandwidth
9188  *
9189  * @wm: watermark calculation data
9190  *
9191  * Calculate the raw dram bandwidth (CIK).
9192  * Used for display watermark bandwidth calculations
9193  * Returns the dram bandwidth in MBytes/s
9194  */
9195 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9196 {
9197         /* Calculate raw DRAM Bandwidth */
9198         fixed20_12 dram_efficiency; /* 0.7 */
9199         fixed20_12 yclk, dram_channels, bandwidth;
9200         fixed20_12 a;
9201
9202         a.full = dfixed_const(1000);
9203         yclk.full = dfixed_const(wm->yclk);
9204         yclk.full = dfixed_div(yclk, a);
9205         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9206         a.full = dfixed_const(10);
9207         dram_efficiency.full = dfixed_const(7);
9208         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9209         bandwidth.full = dfixed_mul(dram_channels, yclk);
9210         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9211
9212         return dfixed_trunc(bandwidth);
9213 }
9214
9215 /**
9216  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9217  *
9218  * @wm: watermark calculation data
9219  *
9220  * Calculate the dram bandwidth used for display (CIK).
9221  * Used for display watermark bandwidth calculations
9222  * Returns the dram bandwidth for display in MBytes/s
9223  */
9224 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9225 {
9226         /* Calculate DRAM Bandwidth and the part allocated to display. */
9227         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9228         fixed20_12 yclk, dram_channels, bandwidth;
9229         fixed20_12 a;
9230
9231         a.full = dfixed_const(1000);
9232         yclk.full = dfixed_const(wm->yclk);
9233         yclk.full = dfixed_div(yclk, a);
9234         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9235         a.full = dfixed_const(10);
9236         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9237         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9238         bandwidth.full = dfixed_mul(dram_channels, yclk);
9239         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9240
9241         return dfixed_trunc(bandwidth);
9242 }
9243
9244 /**
9245  * dce8_data_return_bandwidth - get the data return bandwidth
9246  *
9247  * @wm: watermark calculation data
9248  *
9249  * Calculate the data return bandwidth used for display (CIK).
9250  * Used for display watermark bandwidth calculations
9251  * Returns the data return bandwidth in MBytes/s
9252  */
9253 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9254 {
9255         /* Calculate the display Data return Bandwidth */
9256         fixed20_12 return_efficiency; /* 0.8 */
9257         fixed20_12 sclk, bandwidth;
9258         fixed20_12 a;
9259
9260         a.full = dfixed_const(1000);
9261         sclk.full = dfixed_const(wm->sclk);
9262         sclk.full = dfixed_div(sclk, a);
9263         a.full = dfixed_const(10);
9264         return_efficiency.full = dfixed_const(8);
9265         return_efficiency.full = dfixed_div(return_efficiency, a);
9266         a.full = dfixed_const(32);
9267         bandwidth.full = dfixed_mul(a, sclk);
9268         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9269
9270         return dfixed_trunc(bandwidth);
9271 }
9272
9273 /**
9274  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9275  *
9276  * @wm: watermark calculation data
9277  *
9278  * Calculate the dmif bandwidth used for display (CIK).
9279  * Used for display watermark bandwidth calculations
9280  * Returns the dmif bandwidth in MBytes/s
9281  */
9282 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9283 {
9284         /* Calculate the DMIF Request Bandwidth */
9285         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9286         fixed20_12 disp_clk, bandwidth;
9287         fixed20_12 a, b;
9288
9289         a.full = dfixed_const(1000);
9290         disp_clk.full = dfixed_const(wm->disp_clk);
9291         disp_clk.full = dfixed_div(disp_clk, a);
9292         a.full = dfixed_const(32);
9293         b.full = dfixed_mul(a, disp_clk);
9294
9295         a.full = dfixed_const(10);
9296         disp_clk_request_efficiency.full = dfixed_const(8);
9297         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9298
9299         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9300
9301         return dfixed_trunc(bandwidth);
9302 }
9303
9304 /**
9305  * dce8_available_bandwidth - get the min available bandwidth
9306  *
9307  * @wm: watermark calculation data
9308  *
9309  * Calculate the min available bandwidth used for display (CIK).
9310  * Used for display watermark bandwidth calculations
9311  * Returns the min available bandwidth in MBytes/s
9312  */
9313 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9314 {
9315         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9316         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9317         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9318         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9319
9320         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9321 }
9322
9323 /**
9324  * dce8_average_bandwidth - get the average available bandwidth
9325  *
9326  * @wm: watermark calculation data
9327  *
9328  * Calculate the average available bandwidth used for display (CIK).
9329  * Used for display watermark bandwidth calculations
9330  * Returns the average available bandwidth in MBytes/s
9331  */
9332 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9333 {
9334         /* Calculate the display mode Average Bandwidth
9335          * DisplayMode should contain the source and destination dimensions,
9336          * timing, etc.
9337          */
9338         fixed20_12 bpp;
9339         fixed20_12 line_time;
9340         fixed20_12 src_width;
9341         fixed20_12 bandwidth;
9342         fixed20_12 a;
9343
9344         a.full = dfixed_const(1000);
9345         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9346         line_time.full = dfixed_div(line_time, a);
9347         bpp.full = dfixed_const(wm->bytes_per_pixel);
9348         src_width.full = dfixed_const(wm->src_width);
9349         bandwidth.full = dfixed_mul(src_width, bpp);
9350         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9351         bandwidth.full = dfixed_div(bandwidth, line_time);
9352
9353         return dfixed_trunc(bandwidth);
9354 }
9355
9356 /**
9357  * dce8_latency_watermark - get the latency watermark
9358  *
9359  * @wm: watermark calculation data
9360  *
9361  * Calculate the latency watermark (CIK).
9362  * Used for display watermark bandwidth calculations
9363  * Returns the latency watermark in ns
9364  */
9365 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9366 {
9367         /* First calculate the latency in ns */
9368         u32 mc_latency = 2000; /* 2000 ns. */
9369         u32 available_bandwidth = dce8_available_bandwidth(wm);
9370         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9371         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9372         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9373         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9374                 (wm->num_heads * cursor_line_pair_return_time);
9375         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9376         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9377         u32 tmp, dmif_size = 12288;
9378         fixed20_12 a, b, c;
9379
9380         if (wm->num_heads == 0)
9381                 return 0;
9382
9383         a.full = dfixed_const(2);
9384         b.full = dfixed_const(1);
9385         if ((wm->vsc.full > a.full) ||
9386             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9387             (wm->vtaps >= 5) ||
9388             ((wm->vsc.full >= a.full) && wm->interlaced))
9389                 max_src_lines_per_dst_line = 4;
9390         else
9391                 max_src_lines_per_dst_line = 2;
9392
9393         a.full = dfixed_const(available_bandwidth);
9394         b.full = dfixed_const(wm->num_heads);
9395         a.full = dfixed_div(a, b);
9396
9397         b.full = dfixed_const(mc_latency + 512);
9398         c.full = dfixed_const(wm->disp_clk);
9399         b.full = dfixed_div(b, c);
9400
9401         c.full = dfixed_const(dmif_size);
9402         b.full = dfixed_div(c, b);
9403
9404         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9405
9406         b.full = dfixed_const(1000);
9407         c.full = dfixed_const(wm->disp_clk);
9408         b.full = dfixed_div(c, b);
9409         c.full = dfixed_const(wm->bytes_per_pixel);
9410         b.full = dfixed_mul(b, c);
9411
9412         lb_fill_bw = min(tmp, dfixed_trunc(b));
9413
9414         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9415         b.full = dfixed_const(1000);
9416         c.full = dfixed_const(lb_fill_bw);
9417         b.full = dfixed_div(c, b);
9418         a.full = dfixed_div(a, b);
9419         line_fill_time = dfixed_trunc(a);
9420
9421         if (line_fill_time < wm->active_time)
9422                 return latency;
9423         else
9424                 return latency + (line_fill_time - wm->active_time);
9425
9426 }
9427
9428 /**
9429  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9430  * average and available dram bandwidth
9431  *
9432  * @wm: watermark calculation data
9433  *
9434  * Check if the display average bandwidth fits in the display
9435  * dram bandwidth (CIK).
9436  * Used for display watermark bandwidth calculations
9437  * Returns true if the display fits, false if not.
9438  */
9439 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9440 {
9441         if (dce8_average_bandwidth(wm) <=
9442             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9443                 return true;
9444         else
9445                 return false;
9446 }
9447
9448 /**
9449  * dce8_average_bandwidth_vs_available_bandwidth - check
9450  * average and available bandwidth
9451  *
9452  * @wm: watermark calculation data
9453  *
9454  * Check if the display average bandwidth fits in the display
9455  * available bandwidth (CIK).
9456  * Used for display watermark bandwidth calculations
9457  * Returns true if the display fits, false if not.
9458  */
9459 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9460 {
9461         if (dce8_average_bandwidth(wm) <=
9462             (dce8_available_bandwidth(wm) / wm->num_heads))
9463                 return true;
9464         else
9465                 return false;
9466 }
9467
9468 /**
9469  * dce8_check_latency_hiding - check latency hiding
9470  *
9471  * @wm: watermark calculation data
9472  *
9473  * Check latency hiding (CIK).
9474  * Used for display watermark bandwidth calculations
9475  * Returns true if the display fits, false if not.
9476  */
9477 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9478 {
9479         u32 lb_partitions = wm->lb_size / wm->src_width;
9480         u32 line_time = wm->active_time + wm->blank_time;
9481         u32 latency_tolerant_lines;
9482         u32 latency_hiding;
9483         fixed20_12 a;
9484
9485         a.full = dfixed_const(1);
9486         if (wm->vsc.full > a.full)
9487                 latency_tolerant_lines = 1;
9488         else {
9489                 if (lb_partitions <= (wm->vtaps + 1))
9490                         latency_tolerant_lines = 1;
9491                 else
9492                         latency_tolerant_lines = 2;
9493         }
9494
9495         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9496
9497         if (dce8_latency_watermark(wm) <= latency_hiding)
9498                 return true;
9499         else
9500                 return false;
9501 }
9502
9503 /**
9504  * dce8_program_watermarks - program display watermarks
9505  *
9506  * @rdev: radeon_device pointer
9507  * @radeon_crtc: the selected display controller
9508  * @lb_size: line buffer size
9509  * @num_heads: number of display controllers in use
9510  *
9511  * Calculate and program the display watermarks for the
9512  * selected display controller (CIK).
9513  */
9514 static void dce8_program_watermarks(struct radeon_device *rdev,
9515                                     struct radeon_crtc *radeon_crtc,
9516                                     u32 lb_size, u32 num_heads)
9517 {
9518         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9519         struct dce8_wm_params wm_low, wm_high;
9520         u32 pixel_period;
9521         u32 line_time = 0;
9522         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9523         u32 tmp, wm_mask;
9524
9525         if (radeon_crtc->base.enabled && num_heads && mode) {
9526                 pixel_period = 1000000 / (u32)mode->clock;
9527                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9528
9529                 /* watermark for high clocks */
9530                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9531                     rdev->pm.dpm_enabled) {
9532                         wm_high.yclk =
9533                                 radeon_dpm_get_mclk(rdev, false) * 10;
9534                         wm_high.sclk =
9535                                 radeon_dpm_get_sclk(rdev, false) * 10;
9536                 } else {
9537                         wm_high.yclk = rdev->pm.current_mclk * 10;
9538                         wm_high.sclk = rdev->pm.current_sclk * 10;
9539                 }
9540
9541                 wm_high.disp_clk = mode->clock;
9542                 wm_high.src_width = mode->crtc_hdisplay;
9543                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9544                 wm_high.blank_time = line_time - wm_high.active_time;
9545                 wm_high.interlaced = false;
9546                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9547                         wm_high.interlaced = true;
9548                 wm_high.vsc = radeon_crtc->vsc;
9549                 wm_high.vtaps = 1;
9550                 if (radeon_crtc->rmx_type != RMX_OFF)
9551                         wm_high.vtaps = 2;
9552                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9553                 wm_high.lb_size = lb_size;
9554                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9555                 wm_high.num_heads = num_heads;
9556
9557                 /* set for high clocks */
9558                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9559
9560                 /* possibly force display priority to high */
9561                 /* should really do this at mode validation time... */
9562                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9563                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9564                     !dce8_check_latency_hiding(&wm_high) ||
9565                     (rdev->disp_priority == 2)) {
9566                         DRM_DEBUG_KMS("force priority to high\n");
9567                 }
9568
9569                 /* watermark for low clocks */
9570                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9571                     rdev->pm.dpm_enabled) {
9572                         wm_low.yclk =
9573                                 radeon_dpm_get_mclk(rdev, true) * 10;
9574                         wm_low.sclk =
9575                                 radeon_dpm_get_sclk(rdev, true) * 10;
9576                 } else {
9577                         wm_low.yclk = rdev->pm.current_mclk * 10;
9578                         wm_low.sclk = rdev->pm.current_sclk * 10;
9579                 }
9580
9581                 wm_low.disp_clk = mode->clock;
9582                 wm_low.src_width = mode->crtc_hdisplay;
9583                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9584                 wm_low.blank_time = line_time - wm_low.active_time;
9585                 wm_low.interlaced = false;
9586                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9587                         wm_low.interlaced = true;
9588                 wm_low.vsc = radeon_crtc->vsc;
9589                 wm_low.vtaps = 1;
9590                 if (radeon_crtc->rmx_type != RMX_OFF)
9591                         wm_low.vtaps = 2;
9592                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9593                 wm_low.lb_size = lb_size;
9594                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9595                 wm_low.num_heads = num_heads;
9596
9597                 /* set for low clocks */
9598                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9599
9600                 /* possibly force display priority to high */
9601                 /* should really do this at mode validation time... */
9602                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9603                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9604                     !dce8_check_latency_hiding(&wm_low) ||
9605                     (rdev->disp_priority == 2)) {
9606                         DRM_DEBUG_KMS("force priority to high\n");
9607                 }
9608         }
9609
9610         /* select wm A */
9611         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9612         tmp = wm_mask;
9613         tmp &= ~LATENCY_WATERMARK_MASK(3);
9614         tmp |= LATENCY_WATERMARK_MASK(1);
9615         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9616         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9617                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9618                 LATENCY_HIGH_WATERMARK(line_time)));
9619         /* select wm B */
9620         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9621         tmp &= ~LATENCY_WATERMARK_MASK(3);
9622         tmp |= LATENCY_WATERMARK_MASK(2);
9623         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9624         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9625                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9626                 LATENCY_HIGH_WATERMARK(line_time)));
9627         /* restore original selection */
9628         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9629
9630         /* save values for DPM */
9631         radeon_crtc->line_time = line_time;
9632         radeon_crtc->wm_high = latency_watermark_a;
9633         radeon_crtc->wm_low = latency_watermark_b;
9634 }
9635
9636 /**
9637  * dce8_bandwidth_update - program display watermarks
9638  *
9639  * @rdev: radeon_device pointer
9640  *
9641  * Calculate and program the display watermarks and line
9642  * buffer allocation (CIK).
9643  */
9644 void dce8_bandwidth_update(struct radeon_device *rdev)
9645 {
9646         struct drm_display_mode *mode = NULL;
9647         u32 num_heads = 0, lb_size;
9648         int i;
9649
9650         if (!rdev->mode_info.mode_config_initialized)
9651                 return;
9652
9653         radeon_update_display_priority(rdev);
9654
9655         for (i = 0; i < rdev->num_crtc; i++) {
9656                 if (rdev->mode_info.crtcs[i]->base.enabled)
9657                         num_heads++;
9658         }
9659         for (i = 0; i < rdev->num_crtc; i++) {
9660                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9661                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9662                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9663         }
9664 }
9665
9666 /**
9667  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9668  *
9669  * @rdev: radeon_device pointer
9670  *
9671  * Fetches a GPU clock counter snapshot (SI).
9672  * Returns the 64 bit clock counter snapshot.
9673  */
9674 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9675 {
9676         uint64_t clock;
9677
9678         mutex_lock(&rdev->gpu_clock_mutex);
9679         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9680         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9681                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9682         mutex_unlock(&rdev->gpu_clock_mutex);
9683         return clock;
9684 }
9685
9686 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9687                               u32 cntl_reg, u32 status_reg)
9688 {
9689         int r, i;
9690         struct atom_clock_dividers dividers;
9691         uint32_t tmp;
9692
9693         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9694                                            clock, false, &dividers);
9695         if (r)
9696                 return r;
9697
9698         tmp = RREG32_SMC(cntl_reg);
9699         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9700         tmp |= dividers.post_divider;
9701         WREG32_SMC(cntl_reg, tmp);
9702
9703         for (i = 0; i < 100; i++) {
9704                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9705                         break;
9706                 mdelay(10);
9707         }
9708         if (i == 100)
9709                 return -ETIMEDOUT;
9710
9711         return 0;
9712 }
9713
9714 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9715 {
9716         int r = 0;
9717
9718         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9719         if (r)
9720                 return r;
9721
9722         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9723         return r;
9724 }
9725
9726 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9727 {
9728         int r, i;
9729         struct atom_clock_dividers dividers;
9730         u32 tmp;
9731
9732         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9733                                            ecclk, false, &dividers);
9734         if (r)
9735                 return r;
9736
9737         for (i = 0; i < 100; i++) {
9738                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9739                         break;
9740                 mdelay(10);
9741         }
9742         if (i == 100)
9743                 return -ETIMEDOUT;
9744
9745         tmp = RREG32_SMC(CG_ECLK_CNTL);
9746         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9747         tmp |= dividers.post_divider;
9748         WREG32_SMC(CG_ECLK_CNTL, tmp);
9749
9750         for (i = 0; i < 100; i++) {
9751                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9752                         break;
9753                 mdelay(10);
9754         }
9755         if (i == 100)
9756                 return -ETIMEDOUT;
9757
9758         return 0;
9759 }
9760
9761 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9762 {
9763         struct pci_dev *root = rdev->pdev->bus->self;
9764         int bridge_pos, gpu_pos;
9765         u32 speed_cntl, mask, current_data_rate;
9766         int ret, i;
9767         u16 tmp16;
9768
9769         if (pci_is_root_bus(rdev->pdev->bus))
9770                 return;
9771
9772         if (radeon_pcie_gen2 == 0)
9773                 return;
9774
9775         if (rdev->flags & RADEON_IS_IGP)
9776                 return;
9777
9778         if (!(rdev->flags & RADEON_IS_PCIE))
9779                 return;
9780
9781         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9782         if (ret != 0)
9783                 return;
9784
9785         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9786                 return;
9787
9788         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9789         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9790                 LC_CURRENT_DATA_RATE_SHIFT;
9791         if (mask & DRM_PCIE_SPEED_80) {
9792                 if (current_data_rate == 2) {
9793                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9794                         return;
9795                 }
9796                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9797         } else if (mask & DRM_PCIE_SPEED_50) {
9798                 if (current_data_rate == 1) {
9799                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9800                         return;
9801                 }
9802                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9803         }
9804
9805         bridge_pos = pci_pcie_cap(root);
9806         if (!bridge_pos)
9807                 return;
9808
9809         gpu_pos = pci_pcie_cap(rdev->pdev);
9810         if (!gpu_pos)
9811                 return;
9812
9813         if (mask & DRM_PCIE_SPEED_80) {
9814                 /* re-try equalization if gen3 is not already enabled */
9815                 if (current_data_rate != 2) {
9816                         u16 bridge_cfg, gpu_cfg;
9817                         u16 bridge_cfg2, gpu_cfg2;
9818                         u32 max_lw, current_lw, tmp;
9819
9820                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9821                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9822
9823                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9824                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9825
9826                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9827                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9828
9829                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9830                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9831                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9832
9833                         if (current_lw < max_lw) {
9834                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9835                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9836                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9837                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9838                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9839                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9840                                 }
9841                         }
9842
9843                         for (i = 0; i < 10; i++) {
9844                                 /* check status */
9845                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9846                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9847                                         break;
9848
9849                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9850                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9851
9852                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9853                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9854
9855                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9856                                 tmp |= LC_SET_QUIESCE;
9857                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9858
9859                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9860                                 tmp |= LC_REDO_EQ;
9861                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9862
9863                                 mdelay(100);
9864
9865                                 /* linkctl */
9866                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9867                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9868                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9869                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9870
9871                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9872                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9873                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9874                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9875
9876                                 /* linkctl2 */
9877                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9878                                 tmp16 &= ~((1 << 4) | (7 << 9));
9879                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9880                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9881
9882                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9883                                 tmp16 &= ~((1 << 4) | (7 << 9));
9884                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9885                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9886
9887                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9888                                 tmp &= ~LC_SET_QUIESCE;
9889                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9890                         }
9891                 }
9892         }
9893
9894         /* set the link speed */
9895         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9896         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9897         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9898
9899         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9900         tmp16 &= ~0xf;
9901         if (mask & DRM_PCIE_SPEED_80)
9902                 tmp16 |= 3; /* gen3 */
9903         else if (mask & DRM_PCIE_SPEED_50)
9904                 tmp16 |= 2; /* gen2 */
9905         else
9906                 tmp16 |= 1; /* gen1 */
9907         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9908
9909         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9910         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9911         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9912
9913         for (i = 0; i < rdev->usec_timeout; i++) {
9914                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9915                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9916                         break;
9917                 udelay(1);
9918         }
9919 }
9920
9921 static void cik_program_aspm(struct radeon_device *rdev)
9922 {
9923         u32 data, orig;
9924         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9925         bool disable_clkreq = false;
9926
9927         if (radeon_aspm == 0)
9928                 return;
9929
9930         /* XXX double check IGPs */
9931         if (rdev->flags & RADEON_IS_IGP)
9932                 return;
9933
9934         if (!(rdev->flags & RADEON_IS_PCIE))
9935                 return;
9936
9937         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9938         data &= ~LC_XMIT_N_FTS_MASK;
9939         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9940         if (orig != data)
9941                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9942
9943         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9944         data |= LC_GO_TO_RECOVERY;
9945         if (orig != data)
9946                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9947
9948         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9949         data |= P_IGNORE_EDB_ERR;
9950         if (orig != data)
9951                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9952
9953         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9954         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9955         data |= LC_PMI_TO_L1_DIS;
9956         if (!disable_l0s)
9957                 data |= LC_L0S_INACTIVITY(7);
9958
9959         if (!disable_l1) {
9960                 data |= LC_L1_INACTIVITY(7);
9961                 data &= ~LC_PMI_TO_L1_DIS;
9962                 if (orig != data)
9963                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9964
9965                 if (!disable_plloff_in_l1) {
9966                         bool clk_req_support;
9967
9968                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9969                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9970                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9971                         if (orig != data)
9972                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9973
9974                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9975                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9976                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9977                         if (orig != data)
9978                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9979
9980                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9981                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9982                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9983                         if (orig != data)
9984                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9985
9986                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9987                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9988                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9989                         if (orig != data)
9990                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9991
9992                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9993                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9994                         data |= LC_DYN_LANES_PWR_STATE(3);
9995                         if (orig != data)
9996                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9997
9998                         if (!disable_clkreq &&
9999                             !pci_is_root_bus(rdev->pdev->bus)) {
10000                                 struct pci_dev *root = rdev->pdev->bus->self;
10001                                 u32 lnkcap;
10002
10003                                 clk_req_support = false;
10004                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10005                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10006                                         clk_req_support = true;
10007                         } else {
10008                                 clk_req_support = false;
10009                         }
10010
10011                         if (clk_req_support) {
10012                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10013                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10014                                 if (orig != data)
10015                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10016
10017                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
10018                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10019                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10020                                 if (orig != data)
10021                                         WREG32_SMC(THM_CLK_CNTL, data);
10022
10023                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
10024                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10025                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10026                                 if (orig != data)
10027                                         WREG32_SMC(MISC_CLK_CTRL, data);
10028
10029                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10030                                 data &= ~BCLK_AS_XCLK;
10031                                 if (orig != data)
10032                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
10033
10034                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10035                                 data &= ~FORCE_BIF_REFCLK_EN;
10036                                 if (orig != data)
10037                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10038
10039                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10040                                 data &= ~MPLL_CLKOUT_SEL_MASK;
10041                                 data |= MPLL_CLKOUT_SEL(4);
10042                                 if (orig != data)
10043                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10044                         }
10045                 }
10046         } else {
10047                 if (orig != data)
10048                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10049         }
10050
10051         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10052         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10053         if (orig != data)
10054                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
10055
10056         if (!disable_l0s) {
10057                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10058                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10059                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10060                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10061                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10062                                 data &= ~LC_L0S_INACTIVITY_MASK;
10063                                 if (orig != data)
10064                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10065                         }
10066                 }
10067         }
10068 }