ba50f3c1c2e0332024959c46ebb99a877e7893b1
[kvmfornfv.git] / kernel / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142                                           bool enable);
143
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155                                   u32 reg, u32 *val)
156 {
157         switch (reg) {
158         case GRBM_STATUS:
159         case GRBM_STATUS2:
160         case GRBM_STATUS_SE0:
161         case GRBM_STATUS_SE1:
162         case GRBM_STATUS_SE2:
163         case GRBM_STATUS_SE3:
164         case SRBM_STATUS:
165         case SRBM_STATUS2:
166         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168         case UVD_STATUS:
169         /* TODO VCE */
170                 *val = RREG32(reg);
171                 return 0;
172         default:
173                 return -EINVAL;
174         }
175 }
176
177 /* get temperature in millidegrees */
178 int ci_get_temp(struct radeon_device *rdev)
179 {
180         u32 temp;
181         int actual_temp = 0;
182
183         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
184                 CTF_TEMP_SHIFT;
185
186         if (temp & 0x200)
187                 actual_temp = 255;
188         else
189                 actual_temp = temp & 0x1ff;
190
191         actual_temp = actual_temp * 1000;
192
193         return actual_temp;
194 }
195
196 /* get temperature in millidegrees */
197 int kv_get_temp(struct radeon_device *rdev)
198 {
199         u32 temp;
200         int actual_temp = 0;
201
202         temp = RREG32_SMC(0xC0300E0C);
203
204         if (temp)
205                 actual_temp = (temp / 8) - 49;
206         else
207                 actual_temp = 0;
208
209         actual_temp = actual_temp * 1000;
210
211         return actual_temp;
212 }
213
214 /*
215  * Indirect registers accessor
216  */
217 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
218 {
219         unsigned long flags;
220         u32 r;
221
222         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
223         WREG32(PCIE_INDEX, reg);
224         (void)RREG32(PCIE_INDEX);
225         r = RREG32(PCIE_DATA);
226         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
227         return r;
228 }
229
230 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
231 {
232         unsigned long flags;
233
234         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
235         WREG32(PCIE_INDEX, reg);
236         (void)RREG32(PCIE_INDEX);
237         WREG32(PCIE_DATA, v);
238         (void)RREG32(PCIE_DATA);
239         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
240 }
241
242 static const u32 spectre_rlc_save_restore_register_list[] =
243 {
244         (0x0e00 << 16) | (0xc12c >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc140 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0xc150 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0xc15c >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0xc168 >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0xc170 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc178 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc204 >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc2b4 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc2b8 >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc2bc >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc2c0 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0x8228 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0x829c >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0x869c >> 2),
273         0x00000000,
274         (0x0600 << 16) | (0x98f4 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x98f8 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x9900 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0xc260 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0x90e8 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0x3c000 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0x3c00c >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0x8c1c >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0x9700 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xcd20 >> 2),
293         0x00000000,
294         (0x4e00 << 16) | (0xcd20 >> 2),
295         0x00000000,
296         (0x5e00 << 16) | (0xcd20 >> 2),
297         0x00000000,
298         (0x6e00 << 16) | (0xcd20 >> 2),
299         0x00000000,
300         (0x7e00 << 16) | (0xcd20 >> 2),
301         0x00000000,
302         (0x8e00 << 16) | (0xcd20 >> 2),
303         0x00000000,
304         (0x9e00 << 16) | (0xcd20 >> 2),
305         0x00000000,
306         (0xae00 << 16) | (0xcd20 >> 2),
307         0x00000000,
308         (0xbe00 << 16) | (0xcd20 >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0x89bc >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0x8900 >> 2),
313         0x00000000,
314         0x3,
315         (0x0e00 << 16) | (0xc130 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xc134 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xc1fc >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xc208 >> 2),
322         0x00000000,
323         (0x0e00 << 16) | (0xc264 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0xc268 >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0xc26c >> 2),
328         0x00000000,
329         (0x0e00 << 16) | (0xc270 >> 2),
330         0x00000000,
331         (0x0e00 << 16) | (0xc274 >> 2),
332         0x00000000,
333         (0x0e00 << 16) | (0xc278 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0xc27c >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0xc280 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0xc284 >> 2),
340         0x00000000,
341         (0x0e00 << 16) | (0xc288 >> 2),
342         0x00000000,
343         (0x0e00 << 16) | (0xc28c >> 2),
344         0x00000000,
345         (0x0e00 << 16) | (0xc290 >> 2),
346         0x00000000,
347         (0x0e00 << 16) | (0xc294 >> 2),
348         0x00000000,
349         (0x0e00 << 16) | (0xc298 >> 2),
350         0x00000000,
351         (0x0e00 << 16) | (0xc29c >> 2),
352         0x00000000,
353         (0x0e00 << 16) | (0xc2a0 >> 2),
354         0x00000000,
355         (0x0e00 << 16) | (0xc2a4 >> 2),
356         0x00000000,
357         (0x0e00 << 16) | (0xc2a8 >> 2),
358         0x00000000,
359         (0x0e00 << 16) | (0xc2ac  >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc2b0 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0x301d0 >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0x30238 >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0x30250 >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0x30254 >> 2),
370         0x00000000,
371         (0x0e00 << 16) | (0x30258 >> 2),
372         0x00000000,
373         (0x0e00 << 16) | (0x3025c >> 2),
374         0x00000000,
375         (0x4e00 << 16) | (0xc900 >> 2),
376         0x00000000,
377         (0x5e00 << 16) | (0xc900 >> 2),
378         0x00000000,
379         (0x6e00 << 16) | (0xc900 >> 2),
380         0x00000000,
381         (0x7e00 << 16) | (0xc900 >> 2),
382         0x00000000,
383         (0x8e00 << 16) | (0xc900 >> 2),
384         0x00000000,
385         (0x9e00 << 16) | (0xc900 >> 2),
386         0x00000000,
387         (0xae00 << 16) | (0xc900 >> 2),
388         0x00000000,
389         (0xbe00 << 16) | (0xc900 >> 2),
390         0x00000000,
391         (0x4e00 << 16) | (0xc904 >> 2),
392         0x00000000,
393         (0x5e00 << 16) | (0xc904 >> 2),
394         0x00000000,
395         (0x6e00 << 16) | (0xc904 >> 2),
396         0x00000000,
397         (0x7e00 << 16) | (0xc904 >> 2),
398         0x00000000,
399         (0x8e00 << 16) | (0xc904 >> 2),
400         0x00000000,
401         (0x9e00 << 16) | (0xc904 >> 2),
402         0x00000000,
403         (0xae00 << 16) | (0xc904 >> 2),
404         0x00000000,
405         (0xbe00 << 16) | (0xc904 >> 2),
406         0x00000000,
407         (0x4e00 << 16) | (0xc908 >> 2),
408         0x00000000,
409         (0x5e00 << 16) | (0xc908 >> 2),
410         0x00000000,
411         (0x6e00 << 16) | (0xc908 >> 2),
412         0x00000000,
413         (0x7e00 << 16) | (0xc908 >> 2),
414         0x00000000,
415         (0x8e00 << 16) | (0xc908 >> 2),
416         0x00000000,
417         (0x9e00 << 16) | (0xc908 >> 2),
418         0x00000000,
419         (0xae00 << 16) | (0xc908 >> 2),
420         0x00000000,
421         (0xbe00 << 16) | (0xc908 >> 2),
422         0x00000000,
423         (0x4e00 << 16) | (0xc90c >> 2),
424         0x00000000,
425         (0x5e00 << 16) | (0xc90c >> 2),
426         0x00000000,
427         (0x6e00 << 16) | (0xc90c >> 2),
428         0x00000000,
429         (0x7e00 << 16) | (0xc90c >> 2),
430         0x00000000,
431         (0x8e00 << 16) | (0xc90c >> 2),
432         0x00000000,
433         (0x9e00 << 16) | (0xc90c >> 2),
434         0x00000000,
435         (0xae00 << 16) | (0xc90c >> 2),
436         0x00000000,
437         (0xbe00 << 16) | (0xc90c >> 2),
438         0x00000000,
439         (0x4e00 << 16) | (0xc910 >> 2),
440         0x00000000,
441         (0x5e00 << 16) | (0xc910 >> 2),
442         0x00000000,
443         (0x6e00 << 16) | (0xc910 >> 2),
444         0x00000000,
445         (0x7e00 << 16) | (0xc910 >> 2),
446         0x00000000,
447         (0x8e00 << 16) | (0xc910 >> 2),
448         0x00000000,
449         (0x9e00 << 16) | (0xc910 >> 2),
450         0x00000000,
451         (0xae00 << 16) | (0xc910 >> 2),
452         0x00000000,
453         (0xbe00 << 16) | (0xc910 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0xc99c >> 2),
456         0x00000000,
457         (0x0e00 << 16) | (0x9834 >> 2),
458         0x00000000,
459         (0x0000 << 16) | (0x30f00 >> 2),
460         0x00000000,
461         (0x0001 << 16) | (0x30f00 >> 2),
462         0x00000000,
463         (0x0000 << 16) | (0x30f04 >> 2),
464         0x00000000,
465         (0x0001 << 16) | (0x30f04 >> 2),
466         0x00000000,
467         (0x0000 << 16) | (0x30f08 >> 2),
468         0x00000000,
469         (0x0001 << 16) | (0x30f08 >> 2),
470         0x00000000,
471         (0x0000 << 16) | (0x30f0c >> 2),
472         0x00000000,
473         (0x0001 << 16) | (0x30f0c >> 2),
474         0x00000000,
475         (0x0600 << 16) | (0x9b7c >> 2),
476         0x00000000,
477         (0x0e00 << 16) | (0x8a14 >> 2),
478         0x00000000,
479         (0x0e00 << 16) | (0x8a18 >> 2),
480         0x00000000,
481         (0x0600 << 16) | (0x30a00 >> 2),
482         0x00000000,
483         (0x0e00 << 16) | (0x8bf0 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0x8bcc >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x8b24 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0x30a04 >> 2),
490         0x00000000,
491         (0x0600 << 16) | (0x30a10 >> 2),
492         0x00000000,
493         (0x0600 << 16) | (0x30a14 >> 2),
494         0x00000000,
495         (0x0600 << 16) | (0x30a18 >> 2),
496         0x00000000,
497         (0x0600 << 16) | (0x30a2c >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0xc700 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0xc704 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0xc708 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0xc768 >> 2),
506         0x00000000,
507         (0x0400 << 16) | (0xc770 >> 2),
508         0x00000000,
509         (0x0400 << 16) | (0xc774 >> 2),
510         0x00000000,
511         (0x0400 << 16) | (0xc778 >> 2),
512         0x00000000,
513         (0x0400 << 16) | (0xc77c >> 2),
514         0x00000000,
515         (0x0400 << 16) | (0xc780 >> 2),
516         0x00000000,
517         (0x0400 << 16) | (0xc784 >> 2),
518         0x00000000,
519         (0x0400 << 16) | (0xc788 >> 2),
520         0x00000000,
521         (0x0400 << 16) | (0xc78c >> 2),
522         0x00000000,
523         (0x0400 << 16) | (0xc798 >> 2),
524         0x00000000,
525         (0x0400 << 16) | (0xc79c >> 2),
526         0x00000000,
527         (0x0400 << 16) | (0xc7a0 >> 2),
528         0x00000000,
529         (0x0400 << 16) | (0xc7a4 >> 2),
530         0x00000000,
531         (0x0400 << 16) | (0xc7a8 >> 2),
532         0x00000000,
533         (0x0400 << 16) | (0xc7ac >> 2),
534         0x00000000,
535         (0x0400 << 16) | (0xc7b0 >> 2),
536         0x00000000,
537         (0x0400 << 16) | (0xc7b4 >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0x9100 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0x3c010 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0x92a8 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0x92ac >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0x92b4 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0x92b8 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0x92bc >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x92c0 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x92c4 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x92c8 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x92cc >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x92d0 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x8c00 >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0x8c04 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0x8c20 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x8c38 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x8c3c >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0xae00 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x9604 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0xac08 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0xac0c >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0xac10 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0xac14 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0xac58 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0xac68 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0xac6c >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0xac70 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0xac74 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0xac78 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0xac7c >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xac80 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xac84 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xac88 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0xac8c >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x970c >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x9714 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0x9718 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x971c >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x31068 >> 2),
616         0x00000000,
617         (0x4e00 << 16) | (0x31068 >> 2),
618         0x00000000,
619         (0x5e00 << 16) | (0x31068 >> 2),
620         0x00000000,
621         (0x6e00 << 16) | (0x31068 >> 2),
622         0x00000000,
623         (0x7e00 << 16) | (0x31068 >> 2),
624         0x00000000,
625         (0x8e00 << 16) | (0x31068 >> 2),
626         0x00000000,
627         (0x9e00 << 16) | (0x31068 >> 2),
628         0x00000000,
629         (0xae00 << 16) | (0x31068 >> 2),
630         0x00000000,
631         (0xbe00 << 16) | (0x31068 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0xcd10 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0xcd14 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x88b0 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x88b4 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x88b8 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x88bc >> 2),
644         0x00000000,
645         (0x0400 << 16) | (0x89c0 >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0x88c4 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0x88c8 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0x88d0 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0x88d4 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0x88d8 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0x8980 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x30938 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x3093c >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x30940 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0x89a0 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x30900 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x30904 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0x89b4 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x3c210 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0x3c214 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x3c218 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8904 >> 2),
680         0x00000000,
681         0x5,
682         (0x0e00 << 16) | (0x8c28 >> 2),
683         (0x0e00 << 16) | (0x8c2c >> 2),
684         (0x0e00 << 16) | (0x8c30 >> 2),
685         (0x0e00 << 16) | (0x8c34 >> 2),
686         (0x0e00 << 16) | (0x9600 >> 2),
687 };
688
689 static const u32 kalindi_rlc_save_restore_register_list[] =
690 {
691         (0x0e00 << 16) | (0xc12c >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc140 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc150 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0xc15c >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0xc168 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0xc170 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xc204 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0xc2b4 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0xc2b8 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0xc2bc >> 2),
710         0x00000000,
711         (0x0e00 << 16) | (0xc2c0 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0x8228 >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0x829c >> 2),
716         0x00000000,
717         (0x0e00 << 16) | (0x869c >> 2),
718         0x00000000,
719         (0x0600 << 16) | (0x98f4 >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0x98f8 >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0x9900 >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc260 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0x90e8 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0x3c000 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0x3c00c >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0x8c1c >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0x9700 >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0xcd20 >> 2),
738         0x00000000,
739         (0x4e00 << 16) | (0xcd20 >> 2),
740         0x00000000,
741         (0x5e00 << 16) | (0xcd20 >> 2),
742         0x00000000,
743         (0x6e00 << 16) | (0xcd20 >> 2),
744         0x00000000,
745         (0x7e00 << 16) | (0xcd20 >> 2),
746         0x00000000,
747         (0x0e00 << 16) | (0x89bc >> 2),
748         0x00000000,
749         (0x0e00 << 16) | (0x8900 >> 2),
750         0x00000000,
751         0x3,
752         (0x0e00 << 16) | (0xc130 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0xc134 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0xc1fc >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0xc208 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0xc264 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0xc268 >> 2),
763         0x00000000,
764         (0x0e00 << 16) | (0xc26c >> 2),
765         0x00000000,
766         (0x0e00 << 16) | (0xc270 >> 2),
767         0x00000000,
768         (0x0e00 << 16) | (0xc274 >> 2),
769         0x00000000,
770         (0x0e00 << 16) | (0xc28c >> 2),
771         0x00000000,
772         (0x0e00 << 16) | (0xc290 >> 2),
773         0x00000000,
774         (0x0e00 << 16) | (0xc294 >> 2),
775         0x00000000,
776         (0x0e00 << 16) | (0xc298 >> 2),
777         0x00000000,
778         (0x0e00 << 16) | (0xc2a0 >> 2),
779         0x00000000,
780         (0x0e00 << 16) | (0xc2a4 >> 2),
781         0x00000000,
782         (0x0e00 << 16) | (0xc2a8 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc2ac >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0x301d0 >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0x30238 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0x30250 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0x30254 >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0x30258 >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0x3025c >> 2),
797         0x00000000,
798         (0x4e00 << 16) | (0xc900 >> 2),
799         0x00000000,
800         (0x5e00 << 16) | (0xc900 >> 2),
801         0x00000000,
802         (0x6e00 << 16) | (0xc900 >> 2),
803         0x00000000,
804         (0x7e00 << 16) | (0xc900 >> 2),
805         0x00000000,
806         (0x4e00 << 16) | (0xc904 >> 2),
807         0x00000000,
808         (0x5e00 << 16) | (0xc904 >> 2),
809         0x00000000,
810         (0x6e00 << 16) | (0xc904 >> 2),
811         0x00000000,
812         (0x7e00 << 16) | (0xc904 >> 2),
813         0x00000000,
814         (0x4e00 << 16) | (0xc908 >> 2),
815         0x00000000,
816         (0x5e00 << 16) | (0xc908 >> 2),
817         0x00000000,
818         (0x6e00 << 16) | (0xc908 >> 2),
819         0x00000000,
820         (0x7e00 << 16) | (0xc908 >> 2),
821         0x00000000,
822         (0x4e00 << 16) | (0xc90c >> 2),
823         0x00000000,
824         (0x5e00 << 16) | (0xc90c >> 2),
825         0x00000000,
826         (0x6e00 << 16) | (0xc90c >> 2),
827         0x00000000,
828         (0x7e00 << 16) | (0xc90c >> 2),
829         0x00000000,
830         (0x4e00 << 16) | (0xc910 >> 2),
831         0x00000000,
832         (0x5e00 << 16) | (0xc910 >> 2),
833         0x00000000,
834         (0x6e00 << 16) | (0xc910 >> 2),
835         0x00000000,
836         (0x7e00 << 16) | (0xc910 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0xc99c >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0x9834 >> 2),
841         0x00000000,
842         (0x0000 << 16) | (0x30f00 >> 2),
843         0x00000000,
844         (0x0000 << 16) | (0x30f04 >> 2),
845         0x00000000,
846         (0x0000 << 16) | (0x30f08 >> 2),
847         0x00000000,
848         (0x0000 << 16) | (0x30f0c >> 2),
849         0x00000000,
850         (0x0600 << 16) | (0x9b7c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x8a14 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x8a18 >> 2),
855         0x00000000,
856         (0x0600 << 16) | (0x30a00 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x8bf0 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x8bcc >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x8b24 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0x30a04 >> 2),
865         0x00000000,
866         (0x0600 << 16) | (0x30a10 >> 2),
867         0x00000000,
868         (0x0600 << 16) | (0x30a14 >> 2),
869         0x00000000,
870         (0x0600 << 16) | (0x30a18 >> 2),
871         0x00000000,
872         (0x0600 << 16) | (0x30a2c >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xc700 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xc704 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xc708 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xc768 >> 2),
881         0x00000000,
882         (0x0400 << 16) | (0xc770 >> 2),
883         0x00000000,
884         (0x0400 << 16) | (0xc774 >> 2),
885         0x00000000,
886         (0x0400 << 16) | (0xc798 >> 2),
887         0x00000000,
888         (0x0400 << 16) | (0xc79c >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x9100 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x3c010 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x8c00 >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0x8c04 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0x8c20 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0x8c38 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0x8c3c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0xae00 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x9604 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0xac08 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0xac0c >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0xac10 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0xac14 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0xac58 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0xac68 >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0xac6c >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0xac70 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xac74 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0xac78 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0xac7c >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0xac80 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0xac84 >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0xac88 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0xac8c >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x970c >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x9714 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x9718 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x971c >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x31068 >> 2),
947         0x00000000,
948         (0x4e00 << 16) | (0x31068 >> 2),
949         0x00000000,
950         (0x5e00 << 16) | (0x31068 >> 2),
951         0x00000000,
952         (0x6e00 << 16) | (0x31068 >> 2),
953         0x00000000,
954         (0x7e00 << 16) | (0x31068 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0xcd10 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0xcd14 >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0x88b0 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0x88b4 >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0x88b8 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0x88bc >> 2),
967         0x00000000,
968         (0x0400 << 16) | (0x89c0 >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x88c4 >> 2),
971         0x00000000,
972         (0x0e00 << 16) | (0x88c8 >> 2),
973         0x00000000,
974         (0x0e00 << 16) | (0x88d0 >> 2),
975         0x00000000,
976         (0x0e00 << 16) | (0x88d4 >> 2),
977         0x00000000,
978         (0x0e00 << 16) | (0x88d8 >> 2),
979         0x00000000,
980         (0x0e00 << 16) | (0x8980 >> 2),
981         0x00000000,
982         (0x0e00 << 16) | (0x30938 >> 2),
983         0x00000000,
984         (0x0e00 << 16) | (0x3093c >> 2),
985         0x00000000,
986         (0x0e00 << 16) | (0x30940 >> 2),
987         0x00000000,
988         (0x0e00 << 16) | (0x89a0 >> 2),
989         0x00000000,
990         (0x0e00 << 16) | (0x30900 >> 2),
991         0x00000000,
992         (0x0e00 << 16) | (0x30904 >> 2),
993         0x00000000,
994         (0x0e00 << 16) | (0x89b4 >> 2),
995         0x00000000,
996         (0x0e00 << 16) | (0x3e1fc >> 2),
997         0x00000000,
998         (0x0e00 << 16) | (0x3c210 >> 2),
999         0x00000000,
1000         (0x0e00 << 16) | (0x3c214 >> 2),
1001         0x00000000,
1002         (0x0e00 << 16) | (0x3c218 >> 2),
1003         0x00000000,
1004         (0x0e00 << 16) | (0x8904 >> 2),
1005         0x00000000,
1006         0x5,
1007         (0x0e00 << 16) | (0x8c28 >> 2),
1008         (0x0e00 << 16) | (0x8c2c >> 2),
1009         (0x0e00 << 16) | (0x8c30 >> 2),
1010         (0x0e00 << 16) | (0x8c34 >> 2),
1011         (0x0e00 << 16) | (0x9600 >> 2),
1012 };
1013
1014 static const u32 bonaire_golden_spm_registers[] =
1015 {
1016         0x30800, 0xe0ffffff, 0xe0000000
1017 };
1018
1019 static const u32 bonaire_golden_common_registers[] =
1020 {
1021         0xc770, 0xffffffff, 0x00000800,
1022         0xc774, 0xffffffff, 0x00000800,
1023         0xc798, 0xffffffff, 0x00007fbf,
1024         0xc79c, 0xffffffff, 0x00007faf
1025 };
1026
1027 static const u32 bonaire_golden_registers[] =
1028 {
1029         0x3354, 0x00000333, 0x00000333,
1030         0x3350, 0x000c0fc0, 0x00040200,
1031         0x9a10, 0x00010000, 0x00058208,
1032         0x3c000, 0xffff1fff, 0x00140000,
1033         0x3c200, 0xfdfc0fff, 0x00000100,
1034         0x3c234, 0x40000000, 0x40000200,
1035         0x9830, 0xffffffff, 0x00000000,
1036         0x9834, 0xf00fffff, 0x00000400,
1037         0x9838, 0x0002021c, 0x00020200,
1038         0xc78, 0x00000080, 0x00000000,
1039         0x5bb0, 0x000000f0, 0x00000070,
1040         0x5bc0, 0xf0311fff, 0x80300000,
1041         0x98f8, 0x73773777, 0x12010001,
1042         0x350c, 0x00810000, 0x408af000,
1043         0x7030, 0x31000111, 0x00000011,
1044         0x2f48, 0x73773777, 0x12010001,
1045         0x220c, 0x00007fb6, 0x0021a1b1,
1046         0x2210, 0x00007fb6, 0x002021b1,
1047         0x2180, 0x00007fb6, 0x00002191,
1048         0x2218, 0x00007fb6, 0x002121b1,
1049         0x221c, 0x00007fb6, 0x002021b1,
1050         0x21dc, 0x00007fb6, 0x00002191,
1051         0x21e0, 0x00007fb6, 0x00002191,
1052         0x3628, 0x0000003f, 0x0000000a,
1053         0x362c, 0x0000003f, 0x0000000a,
1054         0x2ae4, 0x00073ffe, 0x000022a2,
1055         0x240c, 0x000007ff, 0x00000000,
1056         0x8a14, 0xf000003f, 0x00000007,
1057         0x8bf0, 0x00002001, 0x00000001,
1058         0x8b24, 0xffffffff, 0x00ffffff,
1059         0x30a04, 0x0000ff0f, 0x00000000,
1060         0x28a4c, 0x07ffffff, 0x06000000,
1061         0x4d8, 0x00000fff, 0x00000100,
1062         0x3e78, 0x00000001, 0x00000002,
1063         0x9100, 0x03000000, 0x0362c688,
1064         0x8c00, 0x000000ff, 0x00000001,
1065         0xe40, 0x00001fff, 0x00001fff,
1066         0x9060, 0x0000007f, 0x00000020,
1067         0x9508, 0x00010000, 0x00010000,
1068         0xac14, 0x000003ff, 0x000000f3,
1069         0xac0c, 0xffffffff, 0x00001032
1070 };
1071
1072 static const u32 bonaire_mgcg_cgcg_init[] =
1073 {
1074         0xc420, 0xffffffff, 0xfffffffc,
1075         0x30800, 0xffffffff, 0xe0000000,
1076         0x3c2a0, 0xffffffff, 0x00000100,
1077         0x3c208, 0xffffffff, 0x00000100,
1078         0x3c2c0, 0xffffffff, 0xc0000100,
1079         0x3c2c8, 0xffffffff, 0xc0000100,
1080         0x3c2c4, 0xffffffff, 0xc0000100,
1081         0x55e4, 0xffffffff, 0x00600100,
1082         0x3c280, 0xffffffff, 0x00000100,
1083         0x3c214, 0xffffffff, 0x06000100,
1084         0x3c220, 0xffffffff, 0x00000100,
1085         0x3c218, 0xffffffff, 0x06000100,
1086         0x3c204, 0xffffffff, 0x00000100,
1087         0x3c2e0, 0xffffffff, 0x00000100,
1088         0x3c224, 0xffffffff, 0x00000100,
1089         0x3c200, 0xffffffff, 0x00000100,
1090         0x3c230, 0xffffffff, 0x00000100,
1091         0x3c234, 0xffffffff, 0x00000100,
1092         0x3c250, 0xffffffff, 0x00000100,
1093         0x3c254, 0xffffffff, 0x00000100,
1094         0x3c258, 0xffffffff, 0x00000100,
1095         0x3c25c, 0xffffffff, 0x00000100,
1096         0x3c260, 0xffffffff, 0x00000100,
1097         0x3c27c, 0xffffffff, 0x00000100,
1098         0x3c278, 0xffffffff, 0x00000100,
1099         0x3c210, 0xffffffff, 0x06000100,
1100         0x3c290, 0xffffffff, 0x00000100,
1101         0x3c274, 0xffffffff, 0x00000100,
1102         0x3c2b4, 0xffffffff, 0x00000100,
1103         0x3c2b0, 0xffffffff, 0x00000100,
1104         0x3c270, 0xffffffff, 0x00000100,
1105         0x30800, 0xffffffff, 0xe0000000,
1106         0x3c020, 0xffffffff, 0x00010000,
1107         0x3c024, 0xffffffff, 0x00030002,
1108         0x3c028, 0xffffffff, 0x00040007,
1109         0x3c02c, 0xffffffff, 0x00060005,
1110         0x3c030, 0xffffffff, 0x00090008,
1111         0x3c034, 0xffffffff, 0x00010000,
1112         0x3c038, 0xffffffff, 0x00030002,
1113         0x3c03c, 0xffffffff, 0x00040007,
1114         0x3c040, 0xffffffff, 0x00060005,
1115         0x3c044, 0xffffffff, 0x00090008,
1116         0x3c048, 0xffffffff, 0x00010000,
1117         0x3c04c, 0xffffffff, 0x00030002,
1118         0x3c050, 0xffffffff, 0x00040007,
1119         0x3c054, 0xffffffff, 0x00060005,
1120         0x3c058, 0xffffffff, 0x00090008,
1121         0x3c05c, 0xffffffff, 0x00010000,
1122         0x3c060, 0xffffffff, 0x00030002,
1123         0x3c064, 0xffffffff, 0x00040007,
1124         0x3c068, 0xffffffff, 0x00060005,
1125         0x3c06c, 0xffffffff, 0x00090008,
1126         0x3c070, 0xffffffff, 0x00010000,
1127         0x3c074, 0xffffffff, 0x00030002,
1128         0x3c078, 0xffffffff, 0x00040007,
1129         0x3c07c, 0xffffffff, 0x00060005,
1130         0x3c080, 0xffffffff, 0x00090008,
1131         0x3c084, 0xffffffff, 0x00010000,
1132         0x3c088, 0xffffffff, 0x00030002,
1133         0x3c08c, 0xffffffff, 0x00040007,
1134         0x3c090, 0xffffffff, 0x00060005,
1135         0x3c094, 0xffffffff, 0x00090008,
1136         0x3c098, 0xffffffff, 0x00010000,
1137         0x3c09c, 0xffffffff, 0x00030002,
1138         0x3c0a0, 0xffffffff, 0x00040007,
1139         0x3c0a4, 0xffffffff, 0x00060005,
1140         0x3c0a8, 0xffffffff, 0x00090008,
1141         0x3c000, 0xffffffff, 0x96e00200,
1142         0x8708, 0xffffffff, 0x00900100,
1143         0xc424, 0xffffffff, 0x0020003f,
1144         0x38, 0xffffffff, 0x0140001c,
1145         0x3c, 0x000f0000, 0x000f0000,
1146         0x220, 0xffffffff, 0xC060000C,
1147         0x224, 0xc0000fff, 0x00000100,
1148         0xf90, 0xffffffff, 0x00000100,
1149         0xf98, 0x00000101, 0x00000000,
1150         0x20a8, 0xffffffff, 0x00000104,
1151         0x55e4, 0xff000fff, 0x00000100,
1152         0x30cc, 0xc0000fff, 0x00000104,
1153         0xc1e4, 0x00000001, 0x00000001,
1154         0xd00c, 0xff000ff0, 0x00000100,
1155         0xd80c, 0xff000ff0, 0x00000100
1156 };
1157
1158 static const u32 spectre_golden_spm_registers[] =
1159 {
1160         0x30800, 0xe0ffffff, 0xe0000000
1161 };
1162
1163 static const u32 spectre_golden_common_registers[] =
1164 {
1165         0xc770, 0xffffffff, 0x00000800,
1166         0xc774, 0xffffffff, 0x00000800,
1167         0xc798, 0xffffffff, 0x00007fbf,
1168         0xc79c, 0xffffffff, 0x00007faf
1169 };
1170
1171 static const u32 spectre_golden_registers[] =
1172 {
1173         0x3c000, 0xffff1fff, 0x96940200,
1174         0x3c00c, 0xffff0001, 0xff000000,
1175         0x3c200, 0xfffc0fff, 0x00000100,
1176         0x6ed8, 0x00010101, 0x00010000,
1177         0x9834, 0xf00fffff, 0x00000400,
1178         0x9838, 0xfffffffc, 0x00020200,
1179         0x5bb0, 0x000000f0, 0x00000070,
1180         0x5bc0, 0xf0311fff, 0x80300000,
1181         0x98f8, 0x73773777, 0x12010001,
1182         0x9b7c, 0x00ff0000, 0x00fc0000,
1183         0x2f48, 0x73773777, 0x12010001,
1184         0x8a14, 0xf000003f, 0x00000007,
1185         0x8b24, 0xffffffff, 0x00ffffff,
1186         0x28350, 0x3f3f3fff, 0x00000082,
1187         0x28354, 0x0000003f, 0x00000000,
1188         0x3e78, 0x00000001, 0x00000002,
1189         0x913c, 0xffff03df, 0x00000004,
1190         0xc768, 0x00000008, 0x00000008,
1191         0x8c00, 0x000008ff, 0x00000800,
1192         0x9508, 0x00010000, 0x00010000,
1193         0xac0c, 0xffffffff, 0x54763210,
1194         0x214f8, 0x01ff01ff, 0x00000002,
1195         0x21498, 0x007ff800, 0x00200000,
1196         0x2015c, 0xffffffff, 0x00000f40,
1197         0x30934, 0xffffffff, 0x00000001
1198 };
1199
1200 static const u32 spectre_mgcg_cgcg_init[] =
1201 {
1202         0xc420, 0xffffffff, 0xfffffffc,
1203         0x30800, 0xffffffff, 0xe0000000,
1204         0x3c2a0, 0xffffffff, 0x00000100,
1205         0x3c208, 0xffffffff, 0x00000100,
1206         0x3c2c0, 0xffffffff, 0x00000100,
1207         0x3c2c8, 0xffffffff, 0x00000100,
1208         0x3c2c4, 0xffffffff, 0x00000100,
1209         0x55e4, 0xffffffff, 0x00600100,
1210         0x3c280, 0xffffffff, 0x00000100,
1211         0x3c214, 0xffffffff, 0x06000100,
1212         0x3c220, 0xffffffff, 0x00000100,
1213         0x3c218, 0xffffffff, 0x06000100,
1214         0x3c204, 0xffffffff, 0x00000100,
1215         0x3c2e0, 0xffffffff, 0x00000100,
1216         0x3c224, 0xffffffff, 0x00000100,
1217         0x3c200, 0xffffffff, 0x00000100,
1218         0x3c230, 0xffffffff, 0x00000100,
1219         0x3c234, 0xffffffff, 0x00000100,
1220         0x3c250, 0xffffffff, 0x00000100,
1221         0x3c254, 0xffffffff, 0x00000100,
1222         0x3c258, 0xffffffff, 0x00000100,
1223         0x3c25c, 0xffffffff, 0x00000100,
1224         0x3c260, 0xffffffff, 0x00000100,
1225         0x3c27c, 0xffffffff, 0x00000100,
1226         0x3c278, 0xffffffff, 0x00000100,
1227         0x3c210, 0xffffffff, 0x06000100,
1228         0x3c290, 0xffffffff, 0x00000100,
1229         0x3c274, 0xffffffff, 0x00000100,
1230         0x3c2b4, 0xffffffff, 0x00000100,
1231         0x3c2b0, 0xffffffff, 0x00000100,
1232         0x3c270, 0xffffffff, 0x00000100,
1233         0x30800, 0xffffffff, 0xe0000000,
1234         0x3c020, 0xffffffff, 0x00010000,
1235         0x3c024, 0xffffffff, 0x00030002,
1236         0x3c028, 0xffffffff, 0x00040007,
1237         0x3c02c, 0xffffffff, 0x00060005,
1238         0x3c030, 0xffffffff, 0x00090008,
1239         0x3c034, 0xffffffff, 0x00010000,
1240         0x3c038, 0xffffffff, 0x00030002,
1241         0x3c03c, 0xffffffff, 0x00040007,
1242         0x3c040, 0xffffffff, 0x00060005,
1243         0x3c044, 0xffffffff, 0x00090008,
1244         0x3c048, 0xffffffff, 0x00010000,
1245         0x3c04c, 0xffffffff, 0x00030002,
1246         0x3c050, 0xffffffff, 0x00040007,
1247         0x3c054, 0xffffffff, 0x00060005,
1248         0x3c058, 0xffffffff, 0x00090008,
1249         0x3c05c, 0xffffffff, 0x00010000,
1250         0x3c060, 0xffffffff, 0x00030002,
1251         0x3c064, 0xffffffff, 0x00040007,
1252         0x3c068, 0xffffffff, 0x00060005,
1253         0x3c06c, 0xffffffff, 0x00090008,
1254         0x3c070, 0xffffffff, 0x00010000,
1255         0x3c074, 0xffffffff, 0x00030002,
1256         0x3c078, 0xffffffff, 0x00040007,
1257         0x3c07c, 0xffffffff, 0x00060005,
1258         0x3c080, 0xffffffff, 0x00090008,
1259         0x3c084, 0xffffffff, 0x00010000,
1260         0x3c088, 0xffffffff, 0x00030002,
1261         0x3c08c, 0xffffffff, 0x00040007,
1262         0x3c090, 0xffffffff, 0x00060005,
1263         0x3c094, 0xffffffff, 0x00090008,
1264         0x3c098, 0xffffffff, 0x00010000,
1265         0x3c09c, 0xffffffff, 0x00030002,
1266         0x3c0a0, 0xffffffff, 0x00040007,
1267         0x3c0a4, 0xffffffff, 0x00060005,
1268         0x3c0a8, 0xffffffff, 0x00090008,
1269         0x3c0ac, 0xffffffff, 0x00010000,
1270         0x3c0b0, 0xffffffff, 0x00030002,
1271         0x3c0b4, 0xffffffff, 0x00040007,
1272         0x3c0b8, 0xffffffff, 0x00060005,
1273         0x3c0bc, 0xffffffff, 0x00090008,
1274         0x3c000, 0xffffffff, 0x96e00200,
1275         0x8708, 0xffffffff, 0x00900100,
1276         0xc424, 0xffffffff, 0x0020003f,
1277         0x38, 0xffffffff, 0x0140001c,
1278         0x3c, 0x000f0000, 0x000f0000,
1279         0x220, 0xffffffff, 0xC060000C,
1280         0x224, 0xc0000fff, 0x00000100,
1281         0xf90, 0xffffffff, 0x00000100,
1282         0xf98, 0x00000101, 0x00000000,
1283         0x20a8, 0xffffffff, 0x00000104,
1284         0x55e4, 0xff000fff, 0x00000100,
1285         0x30cc, 0xc0000fff, 0x00000104,
1286         0xc1e4, 0x00000001, 0x00000001,
1287         0xd00c, 0xff000ff0, 0x00000100,
1288         0xd80c, 0xff000ff0, 0x00000100
1289 };
1290
1291 static const u32 kalindi_golden_spm_registers[] =
1292 {
1293         0x30800, 0xe0ffffff, 0xe0000000
1294 };
1295
1296 static const u32 kalindi_golden_common_registers[] =
1297 {
1298         0xc770, 0xffffffff, 0x00000800,
1299         0xc774, 0xffffffff, 0x00000800,
1300         0xc798, 0xffffffff, 0x00007fbf,
1301         0xc79c, 0xffffffff, 0x00007faf
1302 };
1303
1304 static const u32 kalindi_golden_registers[] =
1305 {
1306         0x3c000, 0xffffdfff, 0x6e944040,
1307         0x55e4, 0xff607fff, 0xfc000100,
1308         0x3c220, 0xff000fff, 0x00000100,
1309         0x3c224, 0xff000fff, 0x00000100,
1310         0x3c200, 0xfffc0fff, 0x00000100,
1311         0x6ed8, 0x00010101, 0x00010000,
1312         0x9830, 0xffffffff, 0x00000000,
1313         0x9834, 0xf00fffff, 0x00000400,
1314         0x5bb0, 0x000000f0, 0x00000070,
1315         0x5bc0, 0xf0311fff, 0x80300000,
1316         0x98f8, 0x73773777, 0x12010001,
1317         0x98fc, 0xffffffff, 0x00000010,
1318         0x9b7c, 0x00ff0000, 0x00fc0000,
1319         0x8030, 0x00001f0f, 0x0000100a,
1320         0x2f48, 0x73773777, 0x12010001,
1321         0x2408, 0x000fffff, 0x000c007f,
1322         0x8a14, 0xf000003f, 0x00000007,
1323         0x8b24, 0x3fff3fff, 0x00ffcfff,
1324         0x30a04, 0x0000ff0f, 0x00000000,
1325         0x28a4c, 0x07ffffff, 0x06000000,
1326         0x4d8, 0x00000fff, 0x00000100,
1327         0x3e78, 0x00000001, 0x00000002,
1328         0xc768, 0x00000008, 0x00000008,
1329         0x8c00, 0x000000ff, 0x00000003,
1330         0x214f8, 0x01ff01ff, 0x00000002,
1331         0x21498, 0x007ff800, 0x00200000,
1332         0x2015c, 0xffffffff, 0x00000f40,
1333         0x88c4, 0x001f3ae3, 0x00000082,
1334         0x88d4, 0x0000001f, 0x00000010,
1335         0x30934, 0xffffffff, 0x00000000
1336 };
1337
1338 static const u32 kalindi_mgcg_cgcg_init[] =
1339 {
1340         0xc420, 0xffffffff, 0xfffffffc,
1341         0x30800, 0xffffffff, 0xe0000000,
1342         0x3c2a0, 0xffffffff, 0x00000100,
1343         0x3c208, 0xffffffff, 0x00000100,
1344         0x3c2c0, 0xffffffff, 0x00000100,
1345         0x3c2c8, 0xffffffff, 0x00000100,
1346         0x3c2c4, 0xffffffff, 0x00000100,
1347         0x55e4, 0xffffffff, 0x00600100,
1348         0x3c280, 0xffffffff, 0x00000100,
1349         0x3c214, 0xffffffff, 0x06000100,
1350         0x3c220, 0xffffffff, 0x00000100,
1351         0x3c218, 0xffffffff, 0x06000100,
1352         0x3c204, 0xffffffff, 0x00000100,
1353         0x3c2e0, 0xffffffff, 0x00000100,
1354         0x3c224, 0xffffffff, 0x00000100,
1355         0x3c200, 0xffffffff, 0x00000100,
1356         0x3c230, 0xffffffff, 0x00000100,
1357         0x3c234, 0xffffffff, 0x00000100,
1358         0x3c250, 0xffffffff, 0x00000100,
1359         0x3c254, 0xffffffff, 0x00000100,
1360         0x3c258, 0xffffffff, 0x00000100,
1361         0x3c25c, 0xffffffff, 0x00000100,
1362         0x3c260, 0xffffffff, 0x00000100,
1363         0x3c27c, 0xffffffff, 0x00000100,
1364         0x3c278, 0xffffffff, 0x00000100,
1365         0x3c210, 0xffffffff, 0x06000100,
1366         0x3c290, 0xffffffff, 0x00000100,
1367         0x3c274, 0xffffffff, 0x00000100,
1368         0x3c2b4, 0xffffffff, 0x00000100,
1369         0x3c2b0, 0xffffffff, 0x00000100,
1370         0x3c270, 0xffffffff, 0x00000100,
1371         0x30800, 0xffffffff, 0xe0000000,
1372         0x3c020, 0xffffffff, 0x00010000,
1373         0x3c024, 0xffffffff, 0x00030002,
1374         0x3c028, 0xffffffff, 0x00040007,
1375         0x3c02c, 0xffffffff, 0x00060005,
1376         0x3c030, 0xffffffff, 0x00090008,
1377         0x3c034, 0xffffffff, 0x00010000,
1378         0x3c038, 0xffffffff, 0x00030002,
1379         0x3c03c, 0xffffffff, 0x00040007,
1380         0x3c040, 0xffffffff, 0x00060005,
1381         0x3c044, 0xffffffff, 0x00090008,
1382         0x3c000, 0xffffffff, 0x96e00200,
1383         0x8708, 0xffffffff, 0x00900100,
1384         0xc424, 0xffffffff, 0x0020003f,
1385         0x38, 0xffffffff, 0x0140001c,
1386         0x3c, 0x000f0000, 0x000f0000,
1387         0x220, 0xffffffff, 0xC060000C,
1388         0x224, 0xc0000fff, 0x00000100,
1389         0x20a8, 0xffffffff, 0x00000104,
1390         0x55e4, 0xff000fff, 0x00000100,
1391         0x30cc, 0xc0000fff, 0x00000104,
1392         0xc1e4, 0x00000001, 0x00000001,
1393         0xd00c, 0xff000ff0, 0x00000100,
1394         0xd80c, 0xff000ff0, 0x00000100
1395 };
1396
1397 static const u32 hawaii_golden_spm_registers[] =
1398 {
1399         0x30800, 0xe0ffffff, 0xe0000000
1400 };
1401
1402 static const u32 hawaii_golden_common_registers[] =
1403 {
1404         0x30800, 0xffffffff, 0xe0000000,
1405         0x28350, 0xffffffff, 0x3a00161a,
1406         0x28354, 0xffffffff, 0x0000002e,
1407         0x9a10, 0xffffffff, 0x00018208,
1408         0x98f8, 0xffffffff, 0x12011003
1409 };
1410
1411 static const u32 hawaii_golden_registers[] =
1412 {
1413         0x3354, 0x00000333, 0x00000333,
1414         0x9a10, 0x00010000, 0x00058208,
1415         0x9830, 0xffffffff, 0x00000000,
1416         0x9834, 0xf00fffff, 0x00000400,
1417         0x9838, 0x0002021c, 0x00020200,
1418         0xc78, 0x00000080, 0x00000000,
1419         0x5bb0, 0x000000f0, 0x00000070,
1420         0x5bc0, 0xf0311fff, 0x80300000,
1421         0x350c, 0x00810000, 0x408af000,
1422         0x7030, 0x31000111, 0x00000011,
1423         0x2f48, 0x73773777, 0x12010001,
1424         0x2120, 0x0000007f, 0x0000001b,
1425         0x21dc, 0x00007fb6, 0x00002191,
1426         0x3628, 0x0000003f, 0x0000000a,
1427         0x362c, 0x0000003f, 0x0000000a,
1428         0x2ae4, 0x00073ffe, 0x000022a2,
1429         0x240c, 0x000007ff, 0x00000000,
1430         0x8bf0, 0x00002001, 0x00000001,
1431         0x8b24, 0xffffffff, 0x00ffffff,
1432         0x30a04, 0x0000ff0f, 0x00000000,
1433         0x28a4c, 0x07ffffff, 0x06000000,
1434         0x3e78, 0x00000001, 0x00000002,
1435         0xc768, 0x00000008, 0x00000008,
1436         0xc770, 0x00000f00, 0x00000800,
1437         0xc774, 0x00000f00, 0x00000800,
1438         0xc798, 0x00ffffff, 0x00ff7fbf,
1439         0xc79c, 0x00ffffff, 0x00ff7faf,
1440         0x8c00, 0x000000ff, 0x00000800,
1441         0xe40, 0x00001fff, 0x00001fff,
1442         0x9060, 0x0000007f, 0x00000020,
1443         0x9508, 0x00010000, 0x00010000,
1444         0xae00, 0x00100000, 0x000ff07c,
1445         0xac14, 0x000003ff, 0x0000000f,
1446         0xac10, 0xffffffff, 0x7564fdec,
1447         0xac0c, 0xffffffff, 0x3120b9a8,
1448         0xac08, 0x20000000, 0x0f9c0000
1449 };
1450
1451 static const u32 hawaii_mgcg_cgcg_init[] =
1452 {
1453         0xc420, 0xffffffff, 0xfffffffd,
1454         0x30800, 0xffffffff, 0xe0000000,
1455         0x3c2a0, 0xffffffff, 0x00000100,
1456         0x3c208, 0xffffffff, 0x00000100,
1457         0x3c2c0, 0xffffffff, 0x00000100,
1458         0x3c2c8, 0xffffffff, 0x00000100,
1459         0x3c2c4, 0xffffffff, 0x00000100,
1460         0x55e4, 0xffffffff, 0x00200100,
1461         0x3c280, 0xffffffff, 0x00000100,
1462         0x3c214, 0xffffffff, 0x06000100,
1463         0x3c220, 0xffffffff, 0x00000100,
1464         0x3c218, 0xffffffff, 0x06000100,
1465         0x3c204, 0xffffffff, 0x00000100,
1466         0x3c2e0, 0xffffffff, 0x00000100,
1467         0x3c224, 0xffffffff, 0x00000100,
1468         0x3c200, 0xffffffff, 0x00000100,
1469         0x3c230, 0xffffffff, 0x00000100,
1470         0x3c234, 0xffffffff, 0x00000100,
1471         0x3c250, 0xffffffff, 0x00000100,
1472         0x3c254, 0xffffffff, 0x00000100,
1473         0x3c258, 0xffffffff, 0x00000100,
1474         0x3c25c, 0xffffffff, 0x00000100,
1475         0x3c260, 0xffffffff, 0x00000100,
1476         0x3c27c, 0xffffffff, 0x00000100,
1477         0x3c278, 0xffffffff, 0x00000100,
1478         0x3c210, 0xffffffff, 0x06000100,
1479         0x3c290, 0xffffffff, 0x00000100,
1480         0x3c274, 0xffffffff, 0x00000100,
1481         0x3c2b4, 0xffffffff, 0x00000100,
1482         0x3c2b0, 0xffffffff, 0x00000100,
1483         0x3c270, 0xffffffff, 0x00000100,
1484         0x30800, 0xffffffff, 0xe0000000,
1485         0x3c020, 0xffffffff, 0x00010000,
1486         0x3c024, 0xffffffff, 0x00030002,
1487         0x3c028, 0xffffffff, 0x00040007,
1488         0x3c02c, 0xffffffff, 0x00060005,
1489         0x3c030, 0xffffffff, 0x00090008,
1490         0x3c034, 0xffffffff, 0x00010000,
1491         0x3c038, 0xffffffff, 0x00030002,
1492         0x3c03c, 0xffffffff, 0x00040007,
1493         0x3c040, 0xffffffff, 0x00060005,
1494         0x3c044, 0xffffffff, 0x00090008,
1495         0x3c048, 0xffffffff, 0x00010000,
1496         0x3c04c, 0xffffffff, 0x00030002,
1497         0x3c050, 0xffffffff, 0x00040007,
1498         0x3c054, 0xffffffff, 0x00060005,
1499         0x3c058, 0xffffffff, 0x00090008,
1500         0x3c05c, 0xffffffff, 0x00010000,
1501         0x3c060, 0xffffffff, 0x00030002,
1502         0x3c064, 0xffffffff, 0x00040007,
1503         0x3c068, 0xffffffff, 0x00060005,
1504         0x3c06c, 0xffffffff, 0x00090008,
1505         0x3c070, 0xffffffff, 0x00010000,
1506         0x3c074, 0xffffffff, 0x00030002,
1507         0x3c078, 0xffffffff, 0x00040007,
1508         0x3c07c, 0xffffffff, 0x00060005,
1509         0x3c080, 0xffffffff, 0x00090008,
1510         0x3c084, 0xffffffff, 0x00010000,
1511         0x3c088, 0xffffffff, 0x00030002,
1512         0x3c08c, 0xffffffff, 0x00040007,
1513         0x3c090, 0xffffffff, 0x00060005,
1514         0x3c094, 0xffffffff, 0x00090008,
1515         0x3c098, 0xffffffff, 0x00010000,
1516         0x3c09c, 0xffffffff, 0x00030002,
1517         0x3c0a0, 0xffffffff, 0x00040007,
1518         0x3c0a4, 0xffffffff, 0x00060005,
1519         0x3c0a8, 0xffffffff, 0x00090008,
1520         0x3c0ac, 0xffffffff, 0x00010000,
1521         0x3c0b0, 0xffffffff, 0x00030002,
1522         0x3c0b4, 0xffffffff, 0x00040007,
1523         0x3c0b8, 0xffffffff, 0x00060005,
1524         0x3c0bc, 0xffffffff, 0x00090008,
1525         0x3c0c0, 0xffffffff, 0x00010000,
1526         0x3c0c4, 0xffffffff, 0x00030002,
1527         0x3c0c8, 0xffffffff, 0x00040007,
1528         0x3c0cc, 0xffffffff, 0x00060005,
1529         0x3c0d0, 0xffffffff, 0x00090008,
1530         0x3c0d4, 0xffffffff, 0x00010000,
1531         0x3c0d8, 0xffffffff, 0x00030002,
1532         0x3c0dc, 0xffffffff, 0x00040007,
1533         0x3c0e0, 0xffffffff, 0x00060005,
1534         0x3c0e4, 0xffffffff, 0x00090008,
1535         0x3c0e8, 0xffffffff, 0x00010000,
1536         0x3c0ec, 0xffffffff, 0x00030002,
1537         0x3c0f0, 0xffffffff, 0x00040007,
1538         0x3c0f4, 0xffffffff, 0x00060005,
1539         0x3c0f8, 0xffffffff, 0x00090008,
1540         0xc318, 0xffffffff, 0x00020200,
1541         0x3350, 0xffffffff, 0x00000200,
1542         0x15c0, 0xffffffff, 0x00000400,
1543         0x55e8, 0xffffffff, 0x00000000,
1544         0x2f50, 0xffffffff, 0x00000902,
1545         0x3c000, 0xffffffff, 0x96940200,
1546         0x8708, 0xffffffff, 0x00900100,
1547         0xc424, 0xffffffff, 0x0020003f,
1548         0x38, 0xffffffff, 0x0140001c,
1549         0x3c, 0x000f0000, 0x000f0000,
1550         0x220, 0xffffffff, 0xc060000c,
1551         0x224, 0xc0000fff, 0x00000100,
1552         0xf90, 0xffffffff, 0x00000100,
1553         0xf98, 0x00000101, 0x00000000,
1554         0x20a8, 0xffffffff, 0x00000104,
1555         0x55e4, 0xff000fff, 0x00000100,
1556         0x30cc, 0xc0000fff, 0x00000104,
1557         0xc1e4, 0x00000001, 0x00000001,
1558         0xd00c, 0xff000ff0, 0x00000100,
1559         0xd80c, 0xff000ff0, 0x00000100
1560 };
1561
1562 static const u32 godavari_golden_registers[] =
1563 {
1564         0x55e4, 0xff607fff, 0xfc000100,
1565         0x6ed8, 0x00010101, 0x00010000,
1566         0x9830, 0xffffffff, 0x00000000,
1567         0x98302, 0xf00fffff, 0x00000400,
1568         0x6130, 0xffffffff, 0x00010000,
1569         0x5bb0, 0x000000f0, 0x00000070,
1570         0x5bc0, 0xf0311fff, 0x80300000,
1571         0x98f8, 0x73773777, 0x12010001,
1572         0x98fc, 0xffffffff, 0x00000010,
1573         0x8030, 0x00001f0f, 0x0000100a,
1574         0x2f48, 0x73773777, 0x12010001,
1575         0x2408, 0x000fffff, 0x000c007f,
1576         0x8a14, 0xf000003f, 0x00000007,
1577         0x8b24, 0xffffffff, 0x00ff0fff,
1578         0x30a04, 0x0000ff0f, 0x00000000,
1579         0x28a4c, 0x07ffffff, 0x06000000,
1580         0x4d8, 0x00000fff, 0x00000100,
1581         0xd014, 0x00010000, 0x00810001,
1582         0xd814, 0x00010000, 0x00810001,
1583         0x3e78, 0x00000001, 0x00000002,
1584         0xc768, 0x00000008, 0x00000008,
1585         0xc770, 0x00000f00, 0x00000800,
1586         0xc774, 0x00000f00, 0x00000800,
1587         0xc798, 0x00ffffff, 0x00ff7fbf,
1588         0xc79c, 0x00ffffff, 0x00ff7faf,
1589         0x8c00, 0x000000ff, 0x00000001,
1590         0x214f8, 0x01ff01ff, 0x00000002,
1591         0x21498, 0x007ff800, 0x00200000,
1592         0x2015c, 0xffffffff, 0x00000f40,
1593         0x88c4, 0x001f3ae3, 0x00000082,
1594         0x88d4, 0x0000001f, 0x00000010,
1595         0x30934, 0xffffffff, 0x00000000
1596 };
1597
1598
1599 static void cik_init_golden_registers(struct radeon_device *rdev)
1600 {
1601         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1602         mutex_lock(&rdev->grbm_idx_mutex);
1603         switch (rdev->family) {
1604         case CHIP_BONAIRE:
1605                 radeon_program_register_sequence(rdev,
1606                                                  bonaire_mgcg_cgcg_init,
1607                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1608                 radeon_program_register_sequence(rdev,
1609                                                  bonaire_golden_registers,
1610                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1611                 radeon_program_register_sequence(rdev,
1612                                                  bonaire_golden_common_registers,
1613                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1614                 radeon_program_register_sequence(rdev,
1615                                                  bonaire_golden_spm_registers,
1616                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1617                 break;
1618         case CHIP_KABINI:
1619                 radeon_program_register_sequence(rdev,
1620                                                  kalindi_mgcg_cgcg_init,
1621                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1622                 radeon_program_register_sequence(rdev,
1623                                                  kalindi_golden_registers,
1624                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1625                 radeon_program_register_sequence(rdev,
1626                                                  kalindi_golden_common_registers,
1627                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1628                 radeon_program_register_sequence(rdev,
1629                                                  kalindi_golden_spm_registers,
1630                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1631                 break;
1632         case CHIP_MULLINS:
1633                 radeon_program_register_sequence(rdev,
1634                                                  kalindi_mgcg_cgcg_init,
1635                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1636                 radeon_program_register_sequence(rdev,
1637                                                  godavari_golden_registers,
1638                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1639                 radeon_program_register_sequence(rdev,
1640                                                  kalindi_golden_common_registers,
1641                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1642                 radeon_program_register_sequence(rdev,
1643                                                  kalindi_golden_spm_registers,
1644                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1645                 break;
1646         case CHIP_KAVERI:
1647                 radeon_program_register_sequence(rdev,
1648                                                  spectre_mgcg_cgcg_init,
1649                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1650                 radeon_program_register_sequence(rdev,
1651                                                  spectre_golden_registers,
1652                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1653                 radeon_program_register_sequence(rdev,
1654                                                  spectre_golden_common_registers,
1655                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1656                 radeon_program_register_sequence(rdev,
1657                                                  spectre_golden_spm_registers,
1658                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1659                 break;
1660         case CHIP_HAWAII:
1661                 radeon_program_register_sequence(rdev,
1662                                                  hawaii_mgcg_cgcg_init,
1663                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1664                 radeon_program_register_sequence(rdev,
1665                                                  hawaii_golden_registers,
1666                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1667                 radeon_program_register_sequence(rdev,
1668                                                  hawaii_golden_common_registers,
1669                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1670                 radeon_program_register_sequence(rdev,
1671                                                  hawaii_golden_spm_registers,
1672                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1673                 break;
1674         default:
1675                 break;
1676         }
1677         mutex_unlock(&rdev->grbm_idx_mutex);
1678 }
1679
1680 /**
1681  * cik_get_xclk - get the xclk
1682  *
1683  * @rdev: radeon_device pointer
1684  *
1685  * Returns the reference clock used by the gfx engine
1686  * (CIK).
1687  */
1688 u32 cik_get_xclk(struct radeon_device *rdev)
1689 {
1690         u32 reference_clock = rdev->clock.spll.reference_freq;
1691
1692         if (rdev->flags & RADEON_IS_IGP) {
1693                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1694                         return reference_clock / 2;
1695         } else {
1696                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1697                         return reference_clock / 4;
1698         }
1699         return reference_clock;
1700 }
1701
1702 /**
1703  * cik_mm_rdoorbell - read a doorbell dword
1704  *
1705  * @rdev: radeon_device pointer
1706  * @index: doorbell index
1707  *
1708  * Returns the value in the doorbell aperture at the
1709  * requested doorbell index (CIK).
1710  */
1711 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1712 {
1713         if (index < rdev->doorbell.num_doorbells) {
1714                 return readl(rdev->doorbell.ptr + index);
1715         } else {
1716                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1717                 return 0;
1718         }
1719 }
1720
1721 /**
1722  * cik_mm_wdoorbell - write a doorbell dword
1723  *
1724  * @rdev: radeon_device pointer
1725  * @index: doorbell index
1726  * @v: value to write
1727  *
1728  * Writes @v to the doorbell aperture at the
1729  * requested doorbell index (CIK).
1730  */
1731 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1732 {
1733         if (index < rdev->doorbell.num_doorbells) {
1734                 writel(v, rdev->doorbell.ptr + index);
1735         } else {
1736                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1737         }
1738 }
1739
1740 #define BONAIRE_IO_MC_REGS_SIZE 36
1741
1742 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1743 {
1744         {0x00000070, 0x04400000},
1745         {0x00000071, 0x80c01803},
1746         {0x00000072, 0x00004004},
1747         {0x00000073, 0x00000100},
1748         {0x00000074, 0x00ff0000},
1749         {0x00000075, 0x34000000},
1750         {0x00000076, 0x08000014},
1751         {0x00000077, 0x00cc08ec},
1752         {0x00000078, 0x00000400},
1753         {0x00000079, 0x00000000},
1754         {0x0000007a, 0x04090000},
1755         {0x0000007c, 0x00000000},
1756         {0x0000007e, 0x4408a8e8},
1757         {0x0000007f, 0x00000304},
1758         {0x00000080, 0x00000000},
1759         {0x00000082, 0x00000001},
1760         {0x00000083, 0x00000002},
1761         {0x00000084, 0xf3e4f400},
1762         {0x00000085, 0x052024e3},
1763         {0x00000087, 0x00000000},
1764         {0x00000088, 0x01000000},
1765         {0x0000008a, 0x1c0a0000},
1766         {0x0000008b, 0xff010000},
1767         {0x0000008d, 0xffffefff},
1768         {0x0000008e, 0xfff3efff},
1769         {0x0000008f, 0xfff3efbf},
1770         {0x00000092, 0xf7ffffff},
1771         {0x00000093, 0xffffff7f},
1772         {0x00000095, 0x00101101},
1773         {0x00000096, 0x00000fff},
1774         {0x00000097, 0x00116fff},
1775         {0x00000098, 0x60010000},
1776         {0x00000099, 0x10010000},
1777         {0x0000009a, 0x00006000},
1778         {0x0000009b, 0x00001000},
1779         {0x0000009f, 0x00b48000}
1780 };
1781
1782 #define HAWAII_IO_MC_REGS_SIZE 22
1783
1784 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1785 {
1786         {0x0000007d, 0x40000000},
1787         {0x0000007e, 0x40180304},
1788         {0x0000007f, 0x0000ff00},
1789         {0x00000081, 0x00000000},
1790         {0x00000083, 0x00000800},
1791         {0x00000086, 0x00000000},
1792         {0x00000087, 0x00000100},
1793         {0x00000088, 0x00020100},
1794         {0x00000089, 0x00000000},
1795         {0x0000008b, 0x00040000},
1796         {0x0000008c, 0x00000100},
1797         {0x0000008e, 0xff010000},
1798         {0x00000090, 0xffffefff},
1799         {0x00000091, 0xfff3efff},
1800         {0x00000092, 0xfff3efbf},
1801         {0x00000093, 0xf7ffffff},
1802         {0x00000094, 0xffffff7f},
1803         {0x00000095, 0x00000fff},
1804         {0x00000096, 0x00116fff},
1805         {0x00000097, 0x60010000},
1806         {0x00000098, 0x10010000},
1807         {0x0000009f, 0x00c79000}
1808 };
1809
1810
1811 /**
1812  * cik_srbm_select - select specific register instances
1813  *
1814  * @rdev: radeon_device pointer
1815  * @me: selected ME (micro engine)
1816  * @pipe: pipe
1817  * @queue: queue
1818  * @vmid: VMID
1819  *
1820  * Switches the currently active registers instances.  Some
1821  * registers are instanced per VMID, others are instanced per
1822  * me/pipe/queue combination.
1823  */
1824 static void cik_srbm_select(struct radeon_device *rdev,
1825                             u32 me, u32 pipe, u32 queue, u32 vmid)
1826 {
1827         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1828                              MEID(me & 0x3) |
1829                              VMID(vmid & 0xf) |
1830                              QUEUEID(queue & 0x7));
1831         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1832 }
1833
1834 /* ucode loading */
1835 /**
1836  * ci_mc_load_microcode - load MC ucode into the hw
1837  *
1838  * @rdev: radeon_device pointer
1839  *
1840  * Load the GDDR MC ucode into the hw (CIK).
1841  * Returns 0 on success, error on failure.
1842  */
1843 int ci_mc_load_microcode(struct radeon_device *rdev)
1844 {
1845         const __be32 *fw_data = NULL;
1846         const __le32 *new_fw_data = NULL;
1847         u32 running, blackout = 0, tmp;
1848         u32 *io_mc_regs = NULL;
1849         const __le32 *new_io_mc_regs = NULL;
1850         int i, regs_size, ucode_size;
1851
1852         if (!rdev->mc_fw)
1853                 return -EINVAL;
1854
1855         if (rdev->new_fw) {
1856                 const struct mc_firmware_header_v1_0 *hdr =
1857                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1858
1859                 radeon_ucode_print_mc_hdr(&hdr->header);
1860
1861                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1862                 new_io_mc_regs = (const __le32 *)
1863                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1864                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1865                 new_fw_data = (const __le32 *)
1866                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1867         } else {
1868                 ucode_size = rdev->mc_fw->size / 4;
1869
1870                 switch (rdev->family) {
1871                 case CHIP_BONAIRE:
1872                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1873                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1874                         break;
1875                 case CHIP_HAWAII:
1876                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1877                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1878                         break;
1879                 default:
1880                         return -EINVAL;
1881                 }
1882                 fw_data = (const __be32 *)rdev->mc_fw->data;
1883         }
1884
1885         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1886
1887         if (running == 0) {
1888                 if (running) {
1889                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1890                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1891                 }
1892
1893                 /* reset the engine and set to writable */
1894                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1895                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1896
1897                 /* load mc io regs */
1898                 for (i = 0; i < regs_size; i++) {
1899                         if (rdev->new_fw) {
1900                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1901                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1902                         } else {
1903                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1904                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1905                         }
1906                 }
1907
1908                 tmp = RREG32(MC_SEQ_MISC0);
1909                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1910                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1911                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1912                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1913                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1914                 }
1915
1916                 /* load the MC ucode */
1917                 for (i = 0; i < ucode_size; i++) {
1918                         if (rdev->new_fw)
1919                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1920                         else
1921                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1922                 }
1923
1924                 /* put the engine back into the active state */
1925                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1926                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1927                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1928
1929                 /* wait for training to complete */
1930                 for (i = 0; i < rdev->usec_timeout; i++) {
1931                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1932                                 break;
1933                         udelay(1);
1934                 }
1935                 for (i = 0; i < rdev->usec_timeout; i++) {
1936                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1937                                 break;
1938                         udelay(1);
1939                 }
1940
1941                 if (running)
1942                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1943         }
1944
1945         return 0;
1946 }
1947
1948 /**
1949  * cik_init_microcode - load ucode images from disk
1950  *
1951  * @rdev: radeon_device pointer
1952  *
1953  * Use the firmware interface to load the ucode images into
1954  * the driver (not loaded into hw).
1955  * Returns 0 on success, error on failure.
1956  */
1957 static int cik_init_microcode(struct radeon_device *rdev)
1958 {
1959         const char *chip_name;
1960         const char *new_chip_name;
1961         size_t pfp_req_size, me_req_size, ce_req_size,
1962                 mec_req_size, rlc_req_size, mc_req_size = 0,
1963                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1964         char fw_name[30];
1965         int new_fw = 0;
1966         int err;
1967         int num_fw;
1968
1969         DRM_DEBUG("\n");
1970
1971         switch (rdev->family) {
1972         case CHIP_BONAIRE:
1973                 chip_name = "BONAIRE";
1974                 new_chip_name = "bonaire";
1975                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1976                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1977                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1978                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1979                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1980                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1981                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1982                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1983                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1984                 num_fw = 8;
1985                 break;
1986         case CHIP_HAWAII:
1987                 chip_name = "HAWAII";
1988                 new_chip_name = "hawaii";
1989                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1991                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1995                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1996                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1998                 num_fw = 8;
1999                 break;
2000         case CHIP_KAVERI:
2001                 chip_name = "KAVERI";
2002                 new_chip_name = "kaveri";
2003                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2005                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2008                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2009                 num_fw = 7;
2010                 break;
2011         case CHIP_KABINI:
2012                 chip_name = "KABINI";
2013                 new_chip_name = "kabini";
2014                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2016                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2019                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2020                 num_fw = 6;
2021                 break;
2022         case CHIP_MULLINS:
2023                 chip_name = "MULLINS";
2024                 new_chip_name = "mullins";
2025                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2026                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2027                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2028                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2029                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2030                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2031                 num_fw = 6;
2032                 break;
2033         default: BUG();
2034         }
2035
2036         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2037
2038         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2039         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2040         if (err) {
2041                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2042                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2043                 if (err)
2044                         goto out;
2045                 if (rdev->pfp_fw->size != pfp_req_size) {
2046                         printk(KERN_ERR
2047                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2048                                rdev->pfp_fw->size, fw_name);
2049                         err = -EINVAL;
2050                         goto out;
2051                 }
2052         } else {
2053                 err = radeon_ucode_validate(rdev->pfp_fw);
2054                 if (err) {
2055                         printk(KERN_ERR
2056                                "cik_fw: validation failed for firmware \"%s\"\n",
2057                                fw_name);
2058                         goto out;
2059                 } else {
2060                         new_fw++;
2061                 }
2062         }
2063
2064         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2065         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2066         if (err) {
2067                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2068                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2069                 if (err)
2070                         goto out;
2071                 if (rdev->me_fw->size != me_req_size) {
2072                         printk(KERN_ERR
2073                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074                                rdev->me_fw->size, fw_name);
2075                         err = -EINVAL;
2076                 }
2077         } else {
2078                 err = radeon_ucode_validate(rdev->me_fw);
2079                 if (err) {
2080                         printk(KERN_ERR
2081                                "cik_fw: validation failed for firmware \"%s\"\n",
2082                                fw_name);
2083                         goto out;
2084                 } else {
2085                         new_fw++;
2086                 }
2087         }
2088
2089         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2090         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2091         if (err) {
2092                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2093                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2094                 if (err)
2095                         goto out;
2096                 if (rdev->ce_fw->size != ce_req_size) {
2097                         printk(KERN_ERR
2098                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->ce_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->ce_fw);
2104                 if (err) {
2105                         printk(KERN_ERR
2106                                "cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2115         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2118                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->mec_fw->size != mec_req_size) {
2122                         printk(KERN_ERR
2123                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124                                rdev->mec_fw->size, fw_name);
2125                         err = -EINVAL;
2126                 }
2127         } else {
2128                 err = radeon_ucode_validate(rdev->mec_fw);
2129                 if (err) {
2130                         printk(KERN_ERR
2131                                "cik_fw: validation failed for firmware \"%s\"\n",
2132                                fw_name);
2133                         goto out;
2134                 } else {
2135                         new_fw++;
2136                 }
2137         }
2138
2139         if (rdev->family == CHIP_KAVERI) {
2140                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2141                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2142                 if (err) {
2143                         goto out;
2144                 } else {
2145                         err = radeon_ucode_validate(rdev->mec2_fw);
2146                         if (err) {
2147                                 goto out;
2148                         } else {
2149                                 new_fw++;
2150                         }
2151                 }
2152         }
2153
2154         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2155         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2156         if (err) {
2157                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2158                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2159                 if (err)
2160                         goto out;
2161                 if (rdev->rlc_fw->size != rlc_req_size) {
2162                         printk(KERN_ERR
2163                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2164                                rdev->rlc_fw->size, fw_name);
2165                         err = -EINVAL;
2166                 }
2167         } else {
2168                 err = radeon_ucode_validate(rdev->rlc_fw);
2169                 if (err) {
2170                         printk(KERN_ERR
2171                                "cik_fw: validation failed for firmware \"%s\"\n",
2172                                fw_name);
2173                         goto out;
2174                 } else {
2175                         new_fw++;
2176                 }
2177         }
2178
2179         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2180         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2181         if (err) {
2182                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2183                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2184                 if (err)
2185                         goto out;
2186                 if (rdev->sdma_fw->size != sdma_req_size) {
2187                         printk(KERN_ERR
2188                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2189                                rdev->sdma_fw->size, fw_name);
2190                         err = -EINVAL;
2191                 }
2192         } else {
2193                 err = radeon_ucode_validate(rdev->sdma_fw);
2194                 if (err) {
2195                         printk(KERN_ERR
2196                                "cik_fw: validation failed for firmware \"%s\"\n",
2197                                fw_name);
2198                         goto out;
2199                 } else {
2200                         new_fw++;
2201                 }
2202         }
2203
2204         /* No SMC, MC ucode on APUs */
2205         if (!(rdev->flags & RADEON_IS_IGP)) {
2206                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2207                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2208                 if (err) {
2209                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2210                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2211                         if (err) {
2212                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2213                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2214                                 if (err)
2215                                         goto out;
2216                         }
2217                         if ((rdev->mc_fw->size != mc_req_size) &&
2218                             (rdev->mc_fw->size != mc2_req_size)){
2219                                 printk(KERN_ERR
2220                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2221                                        rdev->mc_fw->size, fw_name);
2222                                 err = -EINVAL;
2223                         }
2224                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2225                 } else {
2226                         err = radeon_ucode_validate(rdev->mc_fw);
2227                         if (err) {
2228                                 printk(KERN_ERR
2229                                        "cik_fw: validation failed for firmware \"%s\"\n",
2230                                        fw_name);
2231                                 goto out;
2232                         } else {
2233                                 new_fw++;
2234                         }
2235                 }
2236
2237                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2238                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2239                 if (err) {
2240                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2241                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242                         if (err) {
2243                                 printk(KERN_ERR
2244                                        "smc: error loading firmware \"%s\"\n",
2245                                        fw_name);
2246                                 release_firmware(rdev->smc_fw);
2247                                 rdev->smc_fw = NULL;
2248                                 err = 0;
2249                         } else if (rdev->smc_fw->size != smc_req_size) {
2250                                 printk(KERN_ERR
2251                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2252                                        rdev->smc_fw->size, fw_name);
2253                                 err = -EINVAL;
2254                         }
2255                 } else {
2256                         err = radeon_ucode_validate(rdev->smc_fw);
2257                         if (err) {
2258                                 printk(KERN_ERR
2259                                        "cik_fw: validation failed for firmware \"%s\"\n",
2260                                        fw_name);
2261                                 goto out;
2262                         } else {
2263                                 new_fw++;
2264                         }
2265                 }
2266         }
2267
2268         if (new_fw == 0) {
2269                 rdev->new_fw = false;
2270         } else if (new_fw < num_fw) {
2271                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2272                 err = -EINVAL;
2273         } else {
2274                 rdev->new_fw = true;
2275         }
2276
2277 out:
2278         if (err) {
2279                 if (err != -EINVAL)
2280                         printk(KERN_ERR
2281                                "cik_cp: Failed to load firmware \"%s\"\n",
2282                                fw_name);
2283                 release_firmware(rdev->pfp_fw);
2284                 rdev->pfp_fw = NULL;
2285                 release_firmware(rdev->me_fw);
2286                 rdev->me_fw = NULL;
2287                 release_firmware(rdev->ce_fw);
2288                 rdev->ce_fw = NULL;
2289                 release_firmware(rdev->mec_fw);
2290                 rdev->mec_fw = NULL;
2291                 release_firmware(rdev->mec2_fw);
2292                 rdev->mec2_fw = NULL;
2293                 release_firmware(rdev->rlc_fw);
2294                 rdev->rlc_fw = NULL;
2295                 release_firmware(rdev->sdma_fw);
2296                 rdev->sdma_fw = NULL;
2297                 release_firmware(rdev->mc_fw);
2298                 rdev->mc_fw = NULL;
2299                 release_firmware(rdev->smc_fw);
2300                 rdev->smc_fw = NULL;
2301         }
2302         return err;
2303 }
2304
2305 /*
2306  * Core functions
2307  */
2308 /**
2309  * cik_tiling_mode_table_init - init the hw tiling table
2310  *
2311  * @rdev: radeon_device pointer
2312  *
2313  * Starting with SI, the tiling setup is done globally in a
2314  * set of 32 tiling modes.  Rather than selecting each set of
2315  * parameters per surface as on older asics, we just select
2316  * which index in the tiling table we want to use, and the
2317  * surface uses those parameters (CIK).
2318  */
2319 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2320 {
2321         const u32 num_tile_mode_states = 32;
2322         const u32 num_secondary_tile_mode_states = 16;
2323         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2324         u32 num_pipe_configs;
2325         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2326                 rdev->config.cik.max_shader_engines;
2327
2328         switch (rdev->config.cik.mem_row_size_in_kb) {
2329         case 1:
2330                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2331                 break;
2332         case 2:
2333         default:
2334                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2335                 break;
2336         case 4:
2337                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2338                 break;
2339         }
2340
2341         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2342         if (num_pipe_configs > 8)
2343                 num_pipe_configs = 16;
2344
2345         if (num_pipe_configs == 16) {
2346                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2347                         switch (reg_offset) {
2348                         case 0:
2349                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2353                                 break;
2354                         case 1:
2355                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2359                                 break;
2360                         case 2:
2361                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2365                                 break;
2366                         case 3:
2367                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2369                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2371                                 break;
2372                         case 4:
2373                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                                  TILE_SPLIT(split_equal_to_row_size));
2377                                 break;
2378                         case 5:
2379                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2380                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382                                 break;
2383                         case 6:
2384                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2388                                 break;
2389                         case 7:
2390                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2392                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                                  TILE_SPLIT(split_equal_to_row_size));
2394                                 break;
2395                         case 8:
2396                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2397                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2398                                 break;
2399                         case 9:
2400                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2401                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2403                                 break;
2404                         case 10:
2405                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2407                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409                                 break;
2410                         case 11:
2411                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2414                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                                 break;
2416                         case 12:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                                 break;
2422                         case 13:
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2426                                 break;
2427                         case 14:
2428                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                                 break;
2433                         case 16:
2434                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                                 break;
2439                         case 17:
2440                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                                 break;
2445                         case 27:
2446                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449                                 break;
2450                         case 28:
2451                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                                 break;
2456                         case 29:
2457                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2458                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2460                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461                                 break;
2462                         case 30:
2463                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2464                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2465                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467                                 break;
2468                         default:
2469                                 gb_tile_moden = 0;
2470                                 break;
2471                         }
2472                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2473                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2474                 }
2475                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2476                         switch (reg_offset) {
2477                         case 0:
2478                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2482                                 break;
2483                         case 1:
2484                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2488                                 break;
2489                         case 2:
2490                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2494                                 break;
2495                         case 3:
2496                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2500                                 break;
2501                         case 4:
2502                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2506                                 break;
2507                         case 5:
2508                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2512                                 break;
2513                         case 6:
2514                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2518                                 break;
2519                         case 8:
2520                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2524                                 break;
2525                         case 9:
2526                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2530                                 break;
2531                         case 10:
2532                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2536                                 break;
2537                         case 11:
2538                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2541                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2542                                 break;
2543                         case 12:
2544                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2548                                 break;
2549                         case 13:
2550                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2554                                 break;
2555                         case 14:
2556                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2559                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2560                                 break;
2561                         default:
2562                                 gb_tile_moden = 0;
2563                                 break;
2564                         }
2565                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2566                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2567                 }
2568         } else if (num_pipe_configs == 8) {
2569                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2570                         switch (reg_offset) {
2571                         case 0:
2572                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576                                 break;
2577                         case 1:
2578                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2582                                 break;
2583                         case 2:
2584                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588                                 break;
2589                         case 3:
2590                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2592                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2594                                 break;
2595                         case 4:
2596                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2598                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                                  TILE_SPLIT(split_equal_to_row_size));
2600                                 break;
2601                         case 5:
2602                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605                                 break;
2606                         case 6:
2607                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2611                                 break;
2612                         case 7:
2613                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2614                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2615                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                                  TILE_SPLIT(split_equal_to_row_size));
2617                                 break;
2618                         case 8:
2619                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2620                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2621                                 break;
2622                         case 9:
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2626                                 break;
2627                         case 10:
2628                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632                                 break;
2633                         case 11:
2634                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2637                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2638                                 break;
2639                         case 12:
2640                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2641                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                 break;
2645                         case 13:
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2649                                 break;
2650                         case 14:
2651                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655                                 break;
2656                         case 16:
2657                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2659                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2660                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                                 break;
2662                         case 17:
2663                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2664                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667                                 break;
2668                         case 27:
2669                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2670                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2672                                 break;
2673                         case 28:
2674                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678                                 break;
2679                         case 29:
2680                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2682                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684                                 break;
2685                         case 30:
2686                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2687                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2690                                 break;
2691                         default:
2692                                 gb_tile_moden = 0;
2693                                 break;
2694                         }
2695                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2696                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2697                 }
2698                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2699                         switch (reg_offset) {
2700                         case 0:
2701                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2705                                 break;
2706                         case 1:
2707                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2711                                 break;
2712                         case 2:
2713                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2716                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2717                                 break;
2718                         case 3:
2719                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2722                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2723                                 break;
2724                         case 4:
2725                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2729                                 break;
2730                         case 5:
2731                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2734                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2735                                 break;
2736                         case 6:
2737                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2740                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2741                                 break;
2742                         case 8:
2743                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2747                                 break;
2748                         case 9:
2749                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2751                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2752                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2753                                 break;
2754                         case 10:
2755                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2759                                 break;
2760                         case 11:
2761                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2765                                 break;
2766                         case 12:
2767                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2771                                 break;
2772                         case 13:
2773                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2774                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2775                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2776                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2777                                 break;
2778                         case 14:
2779                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2783                                 break;
2784                         default:
2785                                 gb_tile_moden = 0;
2786                                 break;
2787                         }
2788                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2789                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2790                 }
2791         } else if (num_pipe_configs == 4) {
2792                 if (num_rbs == 4) {
2793                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2794                                 switch (reg_offset) {
2795                                 case 0:
2796                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800                                         break;
2801                                 case 1:
2802                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2806                                         break;
2807                                 case 2:
2808                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812                                         break;
2813                                 case 3:
2814                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2816                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2818                                         break;
2819                                 case 4:
2820                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2822                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                                          TILE_SPLIT(split_equal_to_row_size));
2824                                         break;
2825                                 case 5:
2826                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829                                         break;
2830                                 case 6:
2831                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2835                                         break;
2836                                 case 7:
2837                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2838                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2839                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840                                                          TILE_SPLIT(split_equal_to_row_size));
2841                                         break;
2842                                 case 8:
2843                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2844                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2845                                         break;
2846                                 case 9:
2847                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2850                                         break;
2851                                 case 10:
2852                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856                                         break;
2857                                 case 11:
2858                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2859                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2860                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2861                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862                                         break;
2863                                 case 12:
2864                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868                                         break;
2869                                 case 13:
2870                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2871                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2873                                         break;
2874                                 case 14:
2875                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2877                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879                                         break;
2880                                 case 16:
2881                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2882                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2883                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2884                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885                                         break;
2886                                 case 17:
2887                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2888                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2889                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891                                         break;
2892                                 case 27:
2893                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2894                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2896                                         break;
2897                                 case 28:
2898                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2899                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902                                         break;
2903                                 case 29:
2904                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908                                         break;
2909                                 case 30:
2910                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2911                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914                                         break;
2915                                 default:
2916                                         gb_tile_moden = 0;
2917                                         break;
2918                                 }
2919                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2920                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2921                         }
2922                 } else if (num_rbs < 4) {
2923                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2924                                 switch (reg_offset) {
2925                                 case 0:
2926                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2930                                         break;
2931                                 case 1:
2932                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2936                                         break;
2937                                 case 2:
2938                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2940                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2941                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2942                                         break;
2943                                 case 3:
2944                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2946                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2947                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2948                                         break;
2949                                 case 4:
2950                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2951                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2952                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2953                                                          TILE_SPLIT(split_equal_to_row_size));
2954                                         break;
2955                                 case 5:
2956                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2957                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2958                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2959                                         break;
2960                                 case 6:
2961                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2962                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2963                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2964                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2965                                         break;
2966                                 case 7:
2967                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2968                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2969                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2970                                                          TILE_SPLIT(split_equal_to_row_size));
2971                                         break;
2972                                 case 8:
2973                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2974                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2975                                         break;
2976                                 case 9:
2977                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2980                                         break;
2981                                 case 10:
2982                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2984                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986                                         break;
2987                                 case 11:
2988                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2989                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2990                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2992                                         break;
2993                                 case 12:
2994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998                                         break;
2999                                 case 13:
3000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3001                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3003                                         break;
3004                                 case 14:
3005                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009                                         break;
3010                                 case 16:
3011                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3013                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3014                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015                                         break;
3016                                 case 17:
3017                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3018                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3020                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                                         break;
3022                                 case 27:
3023                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3025                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3026                                         break;
3027                                 case 28:
3028                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3029                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3030                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3031                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032                                         break;
3033                                 case 29:
3034                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3035                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3037                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038                                         break;
3039                                 case 30:
3040                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3041                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3042                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3043                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3044                                         break;
3045                                 default:
3046                                         gb_tile_moden = 0;
3047                                         break;
3048                                 }
3049                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3050                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3051                         }
3052                 }
3053                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3054                         switch (reg_offset) {
3055                         case 0:
3056                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3060                                 break;
3061                         case 1:
3062                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3066                                 break;
3067                         case 2:
3068                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3072                                 break;
3073                         case 3:
3074                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3078                                 break;
3079                         case 4:
3080                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3084                                 break;
3085                         case 5:
3086                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3089                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3090                                 break;
3091                         case 6:
3092                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3094                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3095                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3096                                 break;
3097                         case 8:
3098                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3099                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3100                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3102                                 break;
3103                         case 9:
3104                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3105                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3106                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3107                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3108                                 break;
3109                         case 10:
3110                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3112                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3113                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3114                                 break;
3115                         case 11:
3116                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3118                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3119                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3120                                 break;
3121                         case 12:
3122                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3124                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3125                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3126                                 break;
3127                         case 13:
3128                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3130                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3131                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3132                                 break;
3133                         case 14:
3134                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3136                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3137                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3138                                 break;
3139                         default:
3140                                 gb_tile_moden = 0;
3141                                 break;
3142                         }
3143                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3144                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3145                 }
3146         } else if (num_pipe_configs == 2) {
3147                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3148                         switch (reg_offset) {
3149                         case 0:
3150                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3154                                 break;
3155                         case 1:
3156                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3157                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3160                                 break;
3161                         case 2:
3162                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3164                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3166                                 break;
3167                         case 3:
3168                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3170                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3172                                 break;
3173                         case 4:
3174                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3176                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                                  TILE_SPLIT(split_equal_to_row_size));
3178                                 break;
3179                         case 5:
3180                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183                                 break;
3184                         case 6:
3185                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3186                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3187                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3188                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3189                                 break;
3190                         case 7:
3191                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3192                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3193                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                                  TILE_SPLIT(split_equal_to_row_size));
3195                                 break;
3196                         case 8:
3197                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198                                                 PIPE_CONFIG(ADDR_SURF_P2);
3199                                 break;
3200                         case 9:
3201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3202                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203                                                  PIPE_CONFIG(ADDR_SURF_P2));
3204                                 break;
3205                         case 10:
3206                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3208                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210                                 break;
3211                         case 11:
3212                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3213                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3214                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3215                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216                                 break;
3217                         case 12:
3218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3220                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                                 break;
3223                         case 13:
3224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3225                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3227                                 break;
3228                         case 14:
3229                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3232                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233                                 break;
3234                         case 16:
3235                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3237                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3238                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3239                                 break;
3240                         case 17:
3241                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3242                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3244                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245                                 break;
3246                         case 27:
3247                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3248                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3249                                                  PIPE_CONFIG(ADDR_SURF_P2));
3250                                 break;
3251                         case 28:
3252                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3253                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3255                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256                                 break;
3257                         case 29:
3258                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262                                 break;
3263                         case 30:
3264                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3265                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3267                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268                                 break;
3269                         default:
3270                                 gb_tile_moden = 0;
3271                                 break;
3272                         }
3273                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3274                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3275                 }
3276                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3277                         switch (reg_offset) {
3278                         case 0:
3279                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3283                                 break;
3284                         case 1:
3285                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3289                                 break;
3290                         case 2:
3291                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3295                                 break;
3296                         case 3:
3297                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3301                                 break;
3302                         case 4:
3303                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3307                                 break;
3308                         case 5:
3309                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3313                                 break;
3314                         case 6:
3315                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3316                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3317                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3318                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3319                                 break;
3320                         case 8:
3321                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3323                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3325                                 break;
3326                         case 9:
3327                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3328                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3329                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3331                                 break;
3332                         case 10:
3333                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3334                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3335                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3337                                 break;
3338                         case 11:
3339                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3340                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3341                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3343                                 break;
3344                         case 12:
3345                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3347                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3349                                 break;
3350                         case 13:
3351                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3355                                 break;
3356                         case 14:
3357                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3361                                 break;
3362                         default:
3363                                 gb_tile_moden = 0;
3364                                 break;
3365                         }
3366                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3367                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3368                 }
3369         } else
3370                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3371 }
3372
3373 /**
3374  * cik_select_se_sh - select which SE, SH to address
3375  *
3376  * @rdev: radeon_device pointer
3377  * @se_num: shader engine to address
3378  * @sh_num: sh block to address
3379  *
3380  * Select which SE, SH combinations to address. Certain
3381  * registers are instanced per SE or SH.  0xffffffff means
3382  * broadcast to all SEs or SHs (CIK).
3383  */
3384 static void cik_select_se_sh(struct radeon_device *rdev,
3385                              u32 se_num, u32 sh_num)
3386 {
3387         u32 data = INSTANCE_BROADCAST_WRITES;
3388
3389         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3390                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3391         else if (se_num == 0xffffffff)
3392                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3393         else if (sh_num == 0xffffffff)
3394                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3395         else
3396                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3397         WREG32(GRBM_GFX_INDEX, data);
3398 }
3399
3400 /**
3401  * cik_create_bitmask - create a bitmask
3402  *
3403  * @bit_width: length of the mask
3404  *
3405  * create a variable length bit mask (CIK).
3406  * Returns the bitmask.
3407  */
3408 static u32 cik_create_bitmask(u32 bit_width)
3409 {
3410         u32 i, mask = 0;
3411
3412         for (i = 0; i < bit_width; i++) {
3413                 mask <<= 1;
3414                 mask |= 1;
3415         }
3416         return mask;
3417 }
3418
3419 /**
3420  * cik_get_rb_disabled - computes the mask of disabled RBs
3421  *
3422  * @rdev: radeon_device pointer
3423  * @max_rb_num: max RBs (render backends) for the asic
3424  * @se_num: number of SEs (shader engines) for the asic
3425  * @sh_per_se: number of SH blocks per SE for the asic
3426  *
3427  * Calculates the bitmask of disabled RBs (CIK).
3428  * Returns the disabled RB bitmask.
3429  */
3430 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3431                               u32 max_rb_num_per_se,
3432                               u32 sh_per_se)
3433 {
3434         u32 data, mask;
3435
3436         data = RREG32(CC_RB_BACKEND_DISABLE);
3437         if (data & 1)
3438                 data &= BACKEND_DISABLE_MASK;
3439         else
3440                 data = 0;
3441         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3442
3443         data >>= BACKEND_DISABLE_SHIFT;
3444
3445         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3446
3447         return data & mask;
3448 }
3449
3450 /**
3451  * cik_setup_rb - setup the RBs on the asic
3452  *
3453  * @rdev: radeon_device pointer
3454  * @se_num: number of SEs (shader engines) for the asic
3455  * @sh_per_se: number of SH blocks per SE for the asic
3456  * @max_rb_num: max RBs (render backends) for the asic
3457  *
3458  * Configures per-SE/SH RB registers (CIK).
3459  */
3460 static void cik_setup_rb(struct radeon_device *rdev,
3461                          u32 se_num, u32 sh_per_se,
3462                          u32 max_rb_num_per_se)
3463 {
3464         int i, j;
3465         u32 data, mask;
3466         u32 disabled_rbs = 0;
3467         u32 enabled_rbs = 0;
3468
3469         mutex_lock(&rdev->grbm_idx_mutex);
3470         for (i = 0; i < se_num; i++) {
3471                 for (j = 0; j < sh_per_se; j++) {
3472                         cik_select_se_sh(rdev, i, j);
3473                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3474                         if (rdev->family == CHIP_HAWAII)
3475                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3476                         else
3477                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3478                 }
3479         }
3480         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3481         mutex_unlock(&rdev->grbm_idx_mutex);
3482
3483         mask = 1;
3484         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3485                 if (!(disabled_rbs & mask))
3486                         enabled_rbs |= mask;
3487                 mask <<= 1;
3488         }
3489
3490         rdev->config.cik.backend_enable_mask = enabled_rbs;
3491
3492         mutex_lock(&rdev->grbm_idx_mutex);
3493         for (i = 0; i < se_num; i++) {
3494                 cik_select_se_sh(rdev, i, 0xffffffff);
3495                 data = 0;
3496                 for (j = 0; j < sh_per_se; j++) {
3497                         switch (enabled_rbs & 3) {
3498                         case 0:
3499                                 if (j == 0)
3500                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3501                                 else
3502                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3503                                 break;
3504                         case 1:
3505                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3506                                 break;
3507                         case 2:
3508                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3509                                 break;
3510                         case 3:
3511                         default:
3512                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3513                                 break;
3514                         }
3515                         enabled_rbs >>= 2;
3516                 }
3517                 WREG32(PA_SC_RASTER_CONFIG, data);
3518         }
3519         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3520         mutex_unlock(&rdev->grbm_idx_mutex);
3521 }
3522
3523 /**
3524  * cik_gpu_init - setup the 3D engine
3525  *
3526  * @rdev: radeon_device pointer
3527  *
3528  * Configures the 3D engine and tiling configuration
3529  * registers so that the 3D engine is usable.
3530  */
3531 static void cik_gpu_init(struct radeon_device *rdev)
3532 {
3533         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3534         u32 mc_shared_chmap, mc_arb_ramcfg;
3535         u32 hdp_host_path_cntl;
3536         u32 tmp;
3537         int i, j;
3538
3539         switch (rdev->family) {
3540         case CHIP_BONAIRE:
3541                 rdev->config.cik.max_shader_engines = 2;
3542                 rdev->config.cik.max_tile_pipes = 4;
3543                 rdev->config.cik.max_cu_per_sh = 7;
3544                 rdev->config.cik.max_sh_per_se = 1;
3545                 rdev->config.cik.max_backends_per_se = 2;
3546                 rdev->config.cik.max_texture_channel_caches = 4;
3547                 rdev->config.cik.max_gprs = 256;
3548                 rdev->config.cik.max_gs_threads = 32;
3549                 rdev->config.cik.max_hw_contexts = 8;
3550
3551                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3552                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3553                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3554                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3555                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3556                 break;
3557         case CHIP_HAWAII:
3558                 rdev->config.cik.max_shader_engines = 4;
3559                 rdev->config.cik.max_tile_pipes = 16;
3560                 rdev->config.cik.max_cu_per_sh = 11;
3561                 rdev->config.cik.max_sh_per_se = 1;
3562                 rdev->config.cik.max_backends_per_se = 4;
3563                 rdev->config.cik.max_texture_channel_caches = 16;
3564                 rdev->config.cik.max_gprs = 256;
3565                 rdev->config.cik.max_gs_threads = 32;
3566                 rdev->config.cik.max_hw_contexts = 8;
3567
3568                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3569                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3570                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3571                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3572                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3573                 break;
3574         case CHIP_KAVERI:
3575                 rdev->config.cik.max_shader_engines = 1;
3576                 rdev->config.cik.max_tile_pipes = 4;
3577                 if ((rdev->pdev->device == 0x1304) ||
3578                     (rdev->pdev->device == 0x1305) ||
3579                     (rdev->pdev->device == 0x130C) ||
3580                     (rdev->pdev->device == 0x130F) ||
3581                     (rdev->pdev->device == 0x1310) ||
3582                     (rdev->pdev->device == 0x1311) ||
3583                     (rdev->pdev->device == 0x131C)) {
3584                         rdev->config.cik.max_cu_per_sh = 8;
3585                         rdev->config.cik.max_backends_per_se = 2;
3586                 } else if ((rdev->pdev->device == 0x1309) ||
3587                            (rdev->pdev->device == 0x130A) ||
3588                            (rdev->pdev->device == 0x130D) ||
3589                            (rdev->pdev->device == 0x1313) ||
3590                            (rdev->pdev->device == 0x131D)) {
3591                         rdev->config.cik.max_cu_per_sh = 6;
3592                         rdev->config.cik.max_backends_per_se = 2;
3593                 } else if ((rdev->pdev->device == 0x1306) ||
3594                            (rdev->pdev->device == 0x1307) ||
3595                            (rdev->pdev->device == 0x130B) ||
3596                            (rdev->pdev->device == 0x130E) ||
3597                            (rdev->pdev->device == 0x1315) ||
3598                            (rdev->pdev->device == 0x1318) ||
3599                            (rdev->pdev->device == 0x131B)) {
3600                         rdev->config.cik.max_cu_per_sh = 4;
3601                         rdev->config.cik.max_backends_per_se = 1;
3602                 } else {
3603                         rdev->config.cik.max_cu_per_sh = 3;
3604                         rdev->config.cik.max_backends_per_se = 1;
3605                 }
3606                 rdev->config.cik.max_sh_per_se = 1;
3607                 rdev->config.cik.max_texture_channel_caches = 4;
3608                 rdev->config.cik.max_gprs = 256;
3609                 rdev->config.cik.max_gs_threads = 16;
3610                 rdev->config.cik.max_hw_contexts = 8;
3611
3612                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3613                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3614                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3615                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3616                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3617                 break;
3618         case CHIP_KABINI:
3619         case CHIP_MULLINS:
3620         default:
3621                 rdev->config.cik.max_shader_engines = 1;
3622                 rdev->config.cik.max_tile_pipes = 2;
3623                 rdev->config.cik.max_cu_per_sh = 2;
3624                 rdev->config.cik.max_sh_per_se = 1;
3625                 rdev->config.cik.max_backends_per_se = 1;
3626                 rdev->config.cik.max_texture_channel_caches = 2;
3627                 rdev->config.cik.max_gprs = 256;
3628                 rdev->config.cik.max_gs_threads = 16;
3629                 rdev->config.cik.max_hw_contexts = 8;
3630
3631                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3632                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3633                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3634                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3635                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3636                 break;
3637         }
3638
3639         /* Initialize HDP */
3640         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3641                 WREG32((0x2c14 + j), 0x00000000);
3642                 WREG32((0x2c18 + j), 0x00000000);
3643                 WREG32((0x2c1c + j), 0x00000000);
3644                 WREG32((0x2c20 + j), 0x00000000);
3645                 WREG32((0x2c24 + j), 0x00000000);
3646         }
3647
3648         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3649         WREG32(SRBM_INT_CNTL, 0x1);
3650         WREG32(SRBM_INT_ACK, 0x1);
3651
3652         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3653
3654         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3655         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3656
3657         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3658         rdev->config.cik.mem_max_burst_length_bytes = 256;
3659         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3660         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3661         if (rdev->config.cik.mem_row_size_in_kb > 4)
3662                 rdev->config.cik.mem_row_size_in_kb = 4;
3663         /* XXX use MC settings? */
3664         rdev->config.cik.shader_engine_tile_size = 32;
3665         rdev->config.cik.num_gpus = 1;
3666         rdev->config.cik.multi_gpu_tile_size = 64;
3667
3668         /* fix up row size */
3669         gb_addr_config &= ~ROW_SIZE_MASK;
3670         switch (rdev->config.cik.mem_row_size_in_kb) {
3671         case 1:
3672         default:
3673                 gb_addr_config |= ROW_SIZE(0);
3674                 break;
3675         case 2:
3676                 gb_addr_config |= ROW_SIZE(1);
3677                 break;
3678         case 4:
3679                 gb_addr_config |= ROW_SIZE(2);
3680                 break;
3681         }
3682
3683         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3684          * not have bank info, so create a custom tiling dword.
3685          * bits 3:0   num_pipes
3686          * bits 7:4   num_banks
3687          * bits 11:8  group_size
3688          * bits 15:12 row_size
3689          */
3690         rdev->config.cik.tile_config = 0;
3691         switch (rdev->config.cik.num_tile_pipes) {
3692         case 1:
3693                 rdev->config.cik.tile_config |= (0 << 0);
3694                 break;
3695         case 2:
3696                 rdev->config.cik.tile_config |= (1 << 0);
3697                 break;
3698         case 4:
3699                 rdev->config.cik.tile_config |= (2 << 0);
3700                 break;
3701         case 8:
3702         default:
3703                 /* XXX what about 12? */
3704                 rdev->config.cik.tile_config |= (3 << 0);
3705                 break;
3706         }
3707         rdev->config.cik.tile_config |=
3708                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3709         rdev->config.cik.tile_config |=
3710                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3711         rdev->config.cik.tile_config |=
3712                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3713
3714         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3715         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3716         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3717         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3718         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3719         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3720         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3721         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3722
3723         cik_tiling_mode_table_init(rdev);
3724
3725         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3726                      rdev->config.cik.max_sh_per_se,
3727                      rdev->config.cik.max_backends_per_se);
3728
3729         rdev->config.cik.active_cus = 0;
3730         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3731                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3732                         rdev->config.cik.active_cus +=
3733                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3734                 }
3735         }
3736
3737         /* set HW defaults for 3D engine */
3738         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3739
3740         mutex_lock(&rdev->grbm_idx_mutex);
3741         /*
3742          * making sure that the following register writes will be broadcasted
3743          * to all the shaders
3744          */
3745         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3746         WREG32(SX_DEBUG_1, 0x20);
3747
3748         WREG32(TA_CNTL_AUX, 0x00010000);
3749
3750         tmp = RREG32(SPI_CONFIG_CNTL);
3751         tmp |= 0x03000000;
3752         WREG32(SPI_CONFIG_CNTL, tmp);
3753
3754         WREG32(SQ_CONFIG, 1);
3755
3756         WREG32(DB_DEBUG, 0);
3757
3758         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3759         tmp |= 0x00000400;
3760         WREG32(DB_DEBUG2, tmp);
3761
3762         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3763         tmp |= 0x00020200;
3764         WREG32(DB_DEBUG3, tmp);
3765
3766         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3767         tmp |= 0x00018208;
3768         WREG32(CB_HW_CONTROL, tmp);
3769
3770         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3771
3772         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3773                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3774                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3775                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3776
3777         WREG32(VGT_NUM_INSTANCES, 1);
3778
3779         WREG32(CP_PERFMON_CNTL, 0);
3780
3781         WREG32(SQ_CONFIG, 0);
3782
3783         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3784                                           FORCE_EOV_MAX_REZ_CNT(255)));
3785
3786         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3787                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3788
3789         WREG32(VGT_GS_VERTEX_REUSE, 16);
3790         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3791
3792         tmp = RREG32(HDP_MISC_CNTL);
3793         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3794         WREG32(HDP_MISC_CNTL, tmp);
3795
3796         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3797         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3798
3799         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3800         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3801         mutex_unlock(&rdev->grbm_idx_mutex);
3802
3803         udelay(50);
3804 }
3805
3806 /*
3807  * GPU scratch registers helpers function.
3808  */
3809 /**
3810  * cik_scratch_init - setup driver info for CP scratch regs
3811  *
3812  * @rdev: radeon_device pointer
3813  *
3814  * Set up the number and offset of the CP scratch registers.
3815  * NOTE: use of CP scratch registers is a legacy inferface and
3816  * is not used by default on newer asics (r6xx+).  On newer asics,
3817  * memory buffers are used for fences rather than scratch regs.
3818  */
3819 static void cik_scratch_init(struct radeon_device *rdev)
3820 {
3821         int i;
3822
3823         rdev->scratch.num_reg = 7;
3824         rdev->scratch.reg_base = SCRATCH_REG0;
3825         for (i = 0; i < rdev->scratch.num_reg; i++) {
3826                 rdev->scratch.free[i] = true;
3827                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3828         }
3829 }
3830
3831 /**
3832  * cik_ring_test - basic gfx ring test
3833  *
3834  * @rdev: radeon_device pointer
3835  * @ring: radeon_ring structure holding ring information
3836  *
3837  * Allocate a scratch register and write to it using the gfx ring (CIK).
3838  * Provides a basic gfx ring test to verify that the ring is working.
3839  * Used by cik_cp_gfx_resume();
3840  * Returns 0 on success, error on failure.
3841  */
3842 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3843 {
3844         uint32_t scratch;
3845         uint32_t tmp = 0;
3846         unsigned i;
3847         int r;
3848
3849         r = radeon_scratch_get(rdev, &scratch);
3850         if (r) {
3851                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3852                 return r;
3853         }
3854         WREG32(scratch, 0xCAFEDEAD);
3855         r = radeon_ring_lock(rdev, ring, 3);
3856         if (r) {
3857                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3858                 radeon_scratch_free(rdev, scratch);
3859                 return r;
3860         }
3861         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3862         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3863         radeon_ring_write(ring, 0xDEADBEEF);
3864         radeon_ring_unlock_commit(rdev, ring, false);
3865
3866         for (i = 0; i < rdev->usec_timeout; i++) {
3867                 tmp = RREG32(scratch);
3868                 if (tmp == 0xDEADBEEF)
3869                         break;
3870                 DRM_UDELAY(1);
3871         }
3872         if (i < rdev->usec_timeout) {
3873                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3874         } else {
3875                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3876                           ring->idx, scratch, tmp);
3877                 r = -EINVAL;
3878         }
3879         radeon_scratch_free(rdev, scratch);
3880         return r;
3881 }
3882
3883 /**
3884  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3885  *
3886  * @rdev: radeon_device pointer
3887  * @ridx: radeon ring index
3888  *
3889  * Emits an hdp flush on the cp.
3890  */
3891 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3892                                        int ridx)
3893 {
3894         struct radeon_ring *ring = &rdev->ring[ridx];
3895         u32 ref_and_mask;
3896
3897         switch (ring->idx) {
3898         case CAYMAN_RING_TYPE_CP1_INDEX:
3899         case CAYMAN_RING_TYPE_CP2_INDEX:
3900         default:
3901                 switch (ring->me) {
3902                 case 0:
3903                         ref_and_mask = CP2 << ring->pipe;
3904                         break;
3905                 case 1:
3906                         ref_and_mask = CP6 << ring->pipe;
3907                         break;
3908                 default:
3909                         return;
3910                 }
3911                 break;
3912         case RADEON_RING_TYPE_GFX_INDEX:
3913                 ref_and_mask = CP0;
3914                 break;
3915         }
3916
3917         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3918         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3919                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3920                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3921         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3922         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3923         radeon_ring_write(ring, ref_and_mask);
3924         radeon_ring_write(ring, ref_and_mask);
3925         radeon_ring_write(ring, 0x20); /* poll interval */
3926 }
3927
3928 /**
3929  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3930  *
3931  * @rdev: radeon_device pointer
3932  * @fence: radeon fence object
3933  *
3934  * Emits a fence sequnce number on the gfx ring and flushes
3935  * GPU caches.
3936  */
3937 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3938                              struct radeon_fence *fence)
3939 {
3940         struct radeon_ring *ring = &rdev->ring[fence->ring];
3941         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3942
3943         /* Workaround for cache flush problems. First send a dummy EOP
3944          * event down the pipe with seq one below.
3945          */
3946         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3947         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3948                                  EOP_TC_ACTION_EN |
3949                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3950                                  EVENT_INDEX(5)));
3951         radeon_ring_write(ring, addr & 0xfffffffc);
3952         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3953                                 DATA_SEL(1) | INT_SEL(0));
3954         radeon_ring_write(ring, fence->seq - 1);
3955         radeon_ring_write(ring, 0);
3956
3957         /* Then send the real EOP event down the pipe. */
3958         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3959         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3960                                  EOP_TC_ACTION_EN |
3961                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3962                                  EVENT_INDEX(5)));
3963         radeon_ring_write(ring, addr & 0xfffffffc);
3964         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3965         radeon_ring_write(ring, fence->seq);
3966         radeon_ring_write(ring, 0);
3967 }
3968
3969 /**
3970  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3971  *
3972  * @rdev: radeon_device pointer
3973  * @fence: radeon fence object
3974  *
3975  * Emits a fence sequnce number on the compute ring and flushes
3976  * GPU caches.
3977  */
3978 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3979                                  struct radeon_fence *fence)
3980 {
3981         struct radeon_ring *ring = &rdev->ring[fence->ring];
3982         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3983
3984         /* RELEASE_MEM - flush caches, send int */
3985         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3986         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3987                                  EOP_TC_ACTION_EN |
3988                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3989                                  EVENT_INDEX(5)));
3990         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3991         radeon_ring_write(ring, addr & 0xfffffffc);
3992         radeon_ring_write(ring, upper_32_bits(addr));
3993         radeon_ring_write(ring, fence->seq);
3994         radeon_ring_write(ring, 0);
3995 }
3996
3997 /**
3998  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3999  *
4000  * @rdev: radeon_device pointer
4001  * @ring: radeon ring buffer object
4002  * @semaphore: radeon semaphore object
4003  * @emit_wait: Is this a sempahore wait?
4004  *
4005  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4006  * from running ahead of semaphore waits.
4007  */
4008 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4009                              struct radeon_ring *ring,
4010                              struct radeon_semaphore *semaphore,
4011                              bool emit_wait)
4012 {
4013         uint64_t addr = semaphore->gpu_addr;
4014         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4015
4016         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4017         radeon_ring_write(ring, lower_32_bits(addr));
4018         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4019
4020         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4021                 /* Prevent the PFP from running ahead of the semaphore wait */
4022                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4023                 radeon_ring_write(ring, 0x0);
4024         }
4025
4026         return true;
4027 }
4028
4029 /**
4030  * cik_copy_cpdma - copy pages using the CP DMA engine
4031  *
4032  * @rdev: radeon_device pointer
4033  * @src_offset: src GPU address
4034  * @dst_offset: dst GPU address
4035  * @num_gpu_pages: number of GPU pages to xfer
4036  * @resv: reservation object to sync to
4037  *
4038  * Copy GPU paging using the CP DMA engine (CIK+).
4039  * Used by the radeon ttm implementation to move pages if
4040  * registered as the asic copy callback.
4041  */
4042 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4043                                     uint64_t src_offset, uint64_t dst_offset,
4044                                     unsigned num_gpu_pages,
4045                                     struct reservation_object *resv)
4046 {
4047         struct radeon_fence *fence;
4048         struct radeon_sync sync;
4049         int ring_index = rdev->asic->copy.blit_ring_index;
4050         struct radeon_ring *ring = &rdev->ring[ring_index];
4051         u32 size_in_bytes, cur_size_in_bytes, control;
4052         int i, num_loops;
4053         int r = 0;
4054
4055         radeon_sync_create(&sync);
4056
4057         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4058         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4059         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4060         if (r) {
4061                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4062                 radeon_sync_free(rdev, &sync, NULL);
4063                 return ERR_PTR(r);
4064         }
4065
4066         radeon_sync_resv(rdev, &sync, resv, false);
4067         radeon_sync_rings(rdev, &sync, ring->idx);
4068
4069         for (i = 0; i < num_loops; i++) {
4070                 cur_size_in_bytes = size_in_bytes;
4071                 if (cur_size_in_bytes > 0x1fffff)
4072                         cur_size_in_bytes = 0x1fffff;
4073                 size_in_bytes -= cur_size_in_bytes;
4074                 control = 0;
4075                 if (size_in_bytes == 0)
4076                         control |= PACKET3_DMA_DATA_CP_SYNC;
4077                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4078                 radeon_ring_write(ring, control);
4079                 radeon_ring_write(ring, lower_32_bits(src_offset));
4080                 radeon_ring_write(ring, upper_32_bits(src_offset));
4081                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4082                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4083                 radeon_ring_write(ring, cur_size_in_bytes);
4084                 src_offset += cur_size_in_bytes;
4085                 dst_offset += cur_size_in_bytes;
4086         }
4087
4088         r = radeon_fence_emit(rdev, &fence, ring->idx);
4089         if (r) {
4090                 radeon_ring_unlock_undo(rdev, ring);
4091                 radeon_sync_free(rdev, &sync, NULL);
4092                 return ERR_PTR(r);
4093         }
4094
4095         radeon_ring_unlock_commit(rdev, ring, false);
4096         radeon_sync_free(rdev, &sync, fence);
4097
4098         return fence;
4099 }
4100
4101 /*
4102  * IB stuff
4103  */
4104 /**
4105  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4106  *
4107  * @rdev: radeon_device pointer
4108  * @ib: radeon indirect buffer object
4109  *
4110  * Emits an DE (drawing engine) or CE (constant engine) IB
4111  * on the gfx ring.  IBs are usually generated by userspace
4112  * acceleration drivers and submitted to the kernel for
4113  * sheduling on the ring.  This function schedules the IB
4114  * on the gfx ring for execution by the GPU.
4115  */
4116 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4117 {
4118         struct radeon_ring *ring = &rdev->ring[ib->ring];
4119         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4120         u32 header, control = INDIRECT_BUFFER_VALID;
4121
4122         if (ib->is_const_ib) {
4123                 /* set switch buffer packet before const IB */
4124                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4125                 radeon_ring_write(ring, 0);
4126
4127                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4128         } else {
4129                 u32 next_rptr;
4130                 if (ring->rptr_save_reg) {
4131                         next_rptr = ring->wptr + 3 + 4;
4132                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4133                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4134                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4135                         radeon_ring_write(ring, next_rptr);
4136                 } else if (rdev->wb.enabled) {
4137                         next_rptr = ring->wptr + 5 + 4;
4138                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4139                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4140                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4141                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4142                         radeon_ring_write(ring, next_rptr);
4143                 }
4144
4145                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4146         }
4147
4148         control |= ib->length_dw | (vm_id << 24);
4149
4150         radeon_ring_write(ring, header);
4151         radeon_ring_write(ring,
4152 #ifdef __BIG_ENDIAN
4153                           (2 << 0) |
4154 #endif
4155                           (ib->gpu_addr & 0xFFFFFFFC));
4156         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4157         radeon_ring_write(ring, control);
4158 }
4159
4160 /**
4161  * cik_ib_test - basic gfx ring IB test
4162  *
4163  * @rdev: radeon_device pointer
4164  * @ring: radeon_ring structure holding ring information
4165  *
4166  * Allocate an IB and execute it on the gfx ring (CIK).
4167  * Provides a basic gfx ring test to verify that IBs are working.
4168  * Returns 0 on success, error on failure.
4169  */
4170 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4171 {
4172         struct radeon_ib ib;
4173         uint32_t scratch;
4174         uint32_t tmp = 0;
4175         unsigned i;
4176         int r;
4177
4178         r = radeon_scratch_get(rdev, &scratch);
4179         if (r) {
4180                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4181                 return r;
4182         }
4183         WREG32(scratch, 0xCAFEDEAD);
4184         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4185         if (r) {
4186                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4187                 radeon_scratch_free(rdev, scratch);
4188                 return r;
4189         }
4190         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4191         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4192         ib.ptr[2] = 0xDEADBEEF;
4193         ib.length_dw = 3;
4194         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4195         if (r) {
4196                 radeon_scratch_free(rdev, scratch);
4197                 radeon_ib_free(rdev, &ib);
4198                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4199                 return r;
4200         }
4201         r = radeon_fence_wait(ib.fence, false);
4202         if (r) {
4203                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4204                 radeon_scratch_free(rdev, scratch);
4205                 radeon_ib_free(rdev, &ib);
4206                 return r;
4207         }
4208         for (i = 0; i < rdev->usec_timeout; i++) {
4209                 tmp = RREG32(scratch);
4210                 if (tmp == 0xDEADBEEF)
4211                         break;
4212                 DRM_UDELAY(1);
4213         }
4214         if (i < rdev->usec_timeout) {
4215                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4216         } else {
4217                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4218                           scratch, tmp);
4219                 r = -EINVAL;
4220         }
4221         radeon_scratch_free(rdev, scratch);
4222         radeon_ib_free(rdev, &ib);
4223         return r;
4224 }
4225
4226 /*
4227  * CP.
4228  * On CIK, gfx and compute now have independant command processors.
4229  *
4230  * GFX
4231  * Gfx consists of a single ring and can process both gfx jobs and
4232  * compute jobs.  The gfx CP consists of three microengines (ME):
4233  * PFP - Pre-Fetch Parser
4234  * ME - Micro Engine
4235  * CE - Constant Engine
4236  * The PFP and ME make up what is considered the Drawing Engine (DE).
4237  * The CE is an asynchronous engine used for updating buffer desciptors
4238  * used by the DE so that they can be loaded into cache in parallel
4239  * while the DE is processing state update packets.
4240  *
4241  * Compute
4242  * The compute CP consists of two microengines (ME):
4243  * MEC1 - Compute MicroEngine 1
4244  * MEC2 - Compute MicroEngine 2
4245  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4246  * The queues are exposed to userspace and are programmed directly
4247  * by the compute runtime.
4248  */
4249 /**
4250  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4251  *
4252  * @rdev: radeon_device pointer
4253  * @enable: enable or disable the MEs
4254  *
4255  * Halts or unhalts the gfx MEs.
4256  */
4257 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4258 {
4259         if (enable)
4260                 WREG32(CP_ME_CNTL, 0);
4261         else {
4262                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4263                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4264                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4265                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4266         }
4267         udelay(50);
4268 }
4269
4270 /**
4271  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4272  *
4273  * @rdev: radeon_device pointer
4274  *
4275  * Loads the gfx PFP, ME, and CE ucode.
4276  * Returns 0 for success, -EINVAL if the ucode is not available.
4277  */
4278 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4279 {
4280         int i;
4281
4282         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4283                 return -EINVAL;
4284
4285         cik_cp_gfx_enable(rdev, false);
4286
4287         if (rdev->new_fw) {
4288                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4289                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4290                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4291                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4292                 const struct gfx_firmware_header_v1_0 *me_hdr =
4293                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4294                 const __le32 *fw_data;
4295                 u32 fw_size;
4296
4297                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4298                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4299                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4300
4301                 /* PFP */
4302                 fw_data = (const __le32 *)
4303                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4304                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4305                 WREG32(CP_PFP_UCODE_ADDR, 0);
4306                 for (i = 0; i < fw_size; i++)
4307                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4308                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4309
4310                 /* CE */
4311                 fw_data = (const __le32 *)
4312                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4313                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4314                 WREG32(CP_CE_UCODE_ADDR, 0);
4315                 for (i = 0; i < fw_size; i++)
4316                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4317                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4318
4319                 /* ME */
4320                 fw_data = (const __be32 *)
4321                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4322                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4323                 WREG32(CP_ME_RAM_WADDR, 0);
4324                 for (i = 0; i < fw_size; i++)
4325                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4326                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4327                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4328         } else {
4329                 const __be32 *fw_data;
4330
4331                 /* PFP */
4332                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4333                 WREG32(CP_PFP_UCODE_ADDR, 0);
4334                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4335                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4336                 WREG32(CP_PFP_UCODE_ADDR, 0);
4337
4338                 /* CE */
4339                 fw_data = (const __be32 *)rdev->ce_fw->data;
4340                 WREG32(CP_CE_UCODE_ADDR, 0);
4341                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4342                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4343                 WREG32(CP_CE_UCODE_ADDR, 0);
4344
4345                 /* ME */
4346                 fw_data = (const __be32 *)rdev->me_fw->data;
4347                 WREG32(CP_ME_RAM_WADDR, 0);
4348                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4349                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4350                 WREG32(CP_ME_RAM_WADDR, 0);
4351         }
4352
4353         return 0;
4354 }
4355
4356 /**
4357  * cik_cp_gfx_start - start the gfx ring
4358  *
4359  * @rdev: radeon_device pointer
4360  *
4361  * Enables the ring and loads the clear state context and other
4362  * packets required to init the ring.
4363  * Returns 0 for success, error for failure.
4364  */
4365 static int cik_cp_gfx_start(struct radeon_device *rdev)
4366 {
4367         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4368         int r, i;
4369
4370         /* init the CP */
4371         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4372         WREG32(CP_ENDIAN_SWAP, 0);
4373         WREG32(CP_DEVICE_ID, 1);
4374
4375         cik_cp_gfx_enable(rdev, true);
4376
4377         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4378         if (r) {
4379                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4380                 return r;
4381         }
4382
4383         /* init the CE partitions.  CE only used for gfx on CIK */
4384         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4385         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4386         radeon_ring_write(ring, 0x8000);
4387         radeon_ring_write(ring, 0x8000);
4388
4389         /* setup clear context state */
4390         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4391         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4392
4393         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4394         radeon_ring_write(ring, 0x80000000);
4395         radeon_ring_write(ring, 0x80000000);
4396
4397         for (i = 0; i < cik_default_size; i++)
4398                 radeon_ring_write(ring, cik_default_state[i]);
4399
4400         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4402
4403         /* set clear context state */
4404         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4405         radeon_ring_write(ring, 0);
4406
4407         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4408         radeon_ring_write(ring, 0x00000316);
4409         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4410         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4411
4412         radeon_ring_unlock_commit(rdev, ring, false);
4413
4414         return 0;
4415 }
4416
4417 /**
4418  * cik_cp_gfx_fini - stop the gfx ring
4419  *
4420  * @rdev: radeon_device pointer
4421  *
4422  * Stop the gfx ring and tear down the driver ring
4423  * info.
4424  */
4425 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4426 {
4427         cik_cp_gfx_enable(rdev, false);
4428         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4429 }
4430
4431 /**
4432  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4433  *
4434  * @rdev: radeon_device pointer
4435  *
4436  * Program the location and size of the gfx ring buffer
4437  * and test it to make sure it's working.
4438  * Returns 0 for success, error for failure.
4439  */
4440 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4441 {
4442         struct radeon_ring *ring;
4443         u32 tmp;
4444         u32 rb_bufsz;
4445         u64 rb_addr;
4446         int r;
4447
4448         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4449         if (rdev->family != CHIP_HAWAII)
4450                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4451
4452         /* Set the write pointer delay */
4453         WREG32(CP_RB_WPTR_DELAY, 0);
4454
4455         /* set the RB to use vmid 0 */
4456         WREG32(CP_RB_VMID, 0);
4457
4458         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4459
4460         /* ring 0 - compute and gfx */
4461         /* Set ring buffer size */
4462         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4463         rb_bufsz = order_base_2(ring->ring_size / 8);
4464         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4465 #ifdef __BIG_ENDIAN
4466         tmp |= BUF_SWAP_32BIT;
4467 #endif
4468         WREG32(CP_RB0_CNTL, tmp);
4469
4470         /* Initialize the ring buffer's read and write pointers */
4471         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4472         ring->wptr = 0;
4473         WREG32(CP_RB0_WPTR, ring->wptr);
4474
4475         /* set the wb address wether it's enabled or not */
4476         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4477         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4478
4479         /* scratch register shadowing is no longer supported */
4480         WREG32(SCRATCH_UMSK, 0);
4481
4482         if (!rdev->wb.enabled)
4483                 tmp |= RB_NO_UPDATE;
4484
4485         mdelay(1);
4486         WREG32(CP_RB0_CNTL, tmp);
4487
4488         rb_addr = ring->gpu_addr >> 8;
4489         WREG32(CP_RB0_BASE, rb_addr);
4490         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4491
4492         /* start the ring */
4493         cik_cp_gfx_start(rdev);
4494         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4495         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4496         if (r) {
4497                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4498                 return r;
4499         }
4500
4501         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4502                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4503
4504         return 0;
4505 }
4506
4507 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4508                      struct radeon_ring *ring)
4509 {
4510         u32 rptr;
4511
4512         if (rdev->wb.enabled)
4513                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4514         else
4515                 rptr = RREG32(CP_RB0_RPTR);
4516
4517         return rptr;
4518 }
4519
4520 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4521                      struct radeon_ring *ring)
4522 {
4523         u32 wptr;
4524
4525         wptr = RREG32(CP_RB0_WPTR);
4526
4527         return wptr;
4528 }
4529
4530 void cik_gfx_set_wptr(struct radeon_device *rdev,
4531                       struct radeon_ring *ring)
4532 {
4533         WREG32(CP_RB0_WPTR, ring->wptr);
4534         (void)RREG32(CP_RB0_WPTR);
4535 }
4536
4537 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4538                          struct radeon_ring *ring)
4539 {
4540         u32 rptr;
4541
4542         if (rdev->wb.enabled) {
4543                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4544         } else {
4545                 mutex_lock(&rdev->srbm_mutex);
4546                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4547                 rptr = RREG32(CP_HQD_PQ_RPTR);
4548                 cik_srbm_select(rdev, 0, 0, 0, 0);
4549                 mutex_unlock(&rdev->srbm_mutex);
4550         }
4551
4552         return rptr;
4553 }
4554
4555 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4556                          struct radeon_ring *ring)
4557 {
4558         u32 wptr;
4559
4560         if (rdev->wb.enabled) {
4561                 /* XXX check if swapping is necessary on BE */
4562                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4563         } else {
4564                 mutex_lock(&rdev->srbm_mutex);
4565                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4566                 wptr = RREG32(CP_HQD_PQ_WPTR);
4567                 cik_srbm_select(rdev, 0, 0, 0, 0);
4568                 mutex_unlock(&rdev->srbm_mutex);
4569         }
4570
4571         return wptr;
4572 }
4573
4574 void cik_compute_set_wptr(struct radeon_device *rdev,
4575                           struct radeon_ring *ring)
4576 {
4577         /* XXX check if swapping is necessary on BE */
4578         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4579         WDOORBELL32(ring->doorbell_index, ring->wptr);
4580 }
4581
4582 /**
4583  * cik_cp_compute_enable - enable/disable the compute CP MEs
4584  *
4585  * @rdev: radeon_device pointer
4586  * @enable: enable or disable the MEs
4587  *
4588  * Halts or unhalts the compute MEs.
4589  */
4590 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4591 {
4592         if (enable)
4593                 WREG32(CP_MEC_CNTL, 0);
4594         else {
4595                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4596                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4597                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4598         }
4599         udelay(50);
4600 }
4601
4602 /**
4603  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4604  *
4605  * @rdev: radeon_device pointer
4606  *
4607  * Loads the compute MEC1&2 ucode.
4608  * Returns 0 for success, -EINVAL if the ucode is not available.
4609  */
4610 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4611 {
4612         int i;
4613
4614         if (!rdev->mec_fw)
4615                 return -EINVAL;
4616
4617         cik_cp_compute_enable(rdev, false);
4618
4619         if (rdev->new_fw) {
4620                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4621                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4622                 const __le32 *fw_data;
4623                 u32 fw_size;
4624
4625                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4626
4627                 /* MEC1 */
4628                 fw_data = (const __le32 *)
4629                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4630                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4631                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4632                 for (i = 0; i < fw_size; i++)
4633                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4634                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4635
4636                 /* MEC2 */
4637                 if (rdev->family == CHIP_KAVERI) {
4638                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4639                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4640
4641                         fw_data = (const __le32 *)
4642                                 (rdev->mec2_fw->data +
4643                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4644                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4645                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4646                         for (i = 0; i < fw_size; i++)
4647                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4648                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4649                 }
4650         } else {
4651                 const __be32 *fw_data;
4652
4653                 /* MEC1 */
4654                 fw_data = (const __be32 *)rdev->mec_fw->data;
4655                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4656                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4657                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4658                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4659
4660                 if (rdev->family == CHIP_KAVERI) {
4661                         /* MEC2 */
4662                         fw_data = (const __be32 *)rdev->mec_fw->data;
4663                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4664                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4665                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4666                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4667                 }
4668         }
4669
4670         return 0;
4671 }
4672
4673 /**
4674  * cik_cp_compute_start - start the compute queues
4675  *
4676  * @rdev: radeon_device pointer
4677  *
4678  * Enable the compute queues.
4679  * Returns 0 for success, error for failure.
4680  */
4681 static int cik_cp_compute_start(struct radeon_device *rdev)
4682 {
4683         cik_cp_compute_enable(rdev, true);
4684
4685         return 0;
4686 }
4687
4688 /**
4689  * cik_cp_compute_fini - stop the compute queues
4690  *
4691  * @rdev: radeon_device pointer
4692  *
4693  * Stop the compute queues and tear down the driver queue
4694  * info.
4695  */
4696 static void cik_cp_compute_fini(struct radeon_device *rdev)
4697 {
4698         int i, idx, r;
4699
4700         cik_cp_compute_enable(rdev, false);
4701
4702         for (i = 0; i < 2; i++) {
4703                 if (i == 0)
4704                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4705                 else
4706                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4707
4708                 if (rdev->ring[idx].mqd_obj) {
4709                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4710                         if (unlikely(r != 0))
4711                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4712
4713                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4714                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4715
4716                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4717                         rdev->ring[idx].mqd_obj = NULL;
4718                 }
4719         }
4720 }
4721
4722 static void cik_mec_fini(struct radeon_device *rdev)
4723 {
4724         int r;
4725
4726         if (rdev->mec.hpd_eop_obj) {
4727                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4728                 if (unlikely(r != 0))
4729                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4730                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4731                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4732
4733                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4734                 rdev->mec.hpd_eop_obj = NULL;
4735         }
4736 }
4737
4738 #define MEC_HPD_SIZE 2048
4739
4740 static int cik_mec_init(struct radeon_device *rdev)
4741 {
4742         int r;
4743         u32 *hpd;
4744
4745         /*
4746          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4747          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4748          * Nonetheless, we assign only 1 pipe because all other pipes will
4749          * be handled by KFD
4750          */
4751         rdev->mec.num_mec = 1;
4752         rdev->mec.num_pipe = 1;
4753         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4754
4755         if (rdev->mec.hpd_eop_obj == NULL) {
4756                 r = radeon_bo_create(rdev,
4757                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4758                                      PAGE_SIZE, true,
4759                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4760                                      &rdev->mec.hpd_eop_obj);
4761                 if (r) {
4762                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4763                         return r;
4764                 }
4765         }
4766
4767         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4768         if (unlikely(r != 0)) {
4769                 cik_mec_fini(rdev);
4770                 return r;
4771         }
4772         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4773                           &rdev->mec.hpd_eop_gpu_addr);
4774         if (r) {
4775                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4776                 cik_mec_fini(rdev);
4777                 return r;
4778         }
4779         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4780         if (r) {
4781                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4782                 cik_mec_fini(rdev);
4783                 return r;
4784         }
4785
4786         /* clear memory.  Not sure if this is required or not */
4787         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4788
4789         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4790         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4791
4792         return 0;
4793 }
4794
4795 struct hqd_registers
4796 {
4797         u32 cp_mqd_base_addr;
4798         u32 cp_mqd_base_addr_hi;
4799         u32 cp_hqd_active;
4800         u32 cp_hqd_vmid;
4801         u32 cp_hqd_persistent_state;
4802         u32 cp_hqd_pipe_priority;
4803         u32 cp_hqd_queue_priority;
4804         u32 cp_hqd_quantum;
4805         u32 cp_hqd_pq_base;
4806         u32 cp_hqd_pq_base_hi;
4807         u32 cp_hqd_pq_rptr;
4808         u32 cp_hqd_pq_rptr_report_addr;
4809         u32 cp_hqd_pq_rptr_report_addr_hi;
4810         u32 cp_hqd_pq_wptr_poll_addr;
4811         u32 cp_hqd_pq_wptr_poll_addr_hi;
4812         u32 cp_hqd_pq_doorbell_control;
4813         u32 cp_hqd_pq_wptr;
4814         u32 cp_hqd_pq_control;
4815         u32 cp_hqd_ib_base_addr;
4816         u32 cp_hqd_ib_base_addr_hi;
4817         u32 cp_hqd_ib_rptr;
4818         u32 cp_hqd_ib_control;
4819         u32 cp_hqd_iq_timer;
4820         u32 cp_hqd_iq_rptr;
4821         u32 cp_hqd_dequeue_request;
4822         u32 cp_hqd_dma_offload;
4823         u32 cp_hqd_sema_cmd;
4824         u32 cp_hqd_msg_type;
4825         u32 cp_hqd_atomic0_preop_lo;
4826         u32 cp_hqd_atomic0_preop_hi;
4827         u32 cp_hqd_atomic1_preop_lo;
4828         u32 cp_hqd_atomic1_preop_hi;
4829         u32 cp_hqd_hq_scheduler0;
4830         u32 cp_hqd_hq_scheduler1;
4831         u32 cp_mqd_control;
4832 };
4833
4834 struct bonaire_mqd
4835 {
4836         u32 header;
4837         u32 dispatch_initiator;
4838         u32 dimensions[3];
4839         u32 start_idx[3];
4840         u32 num_threads[3];
4841         u32 pipeline_stat_enable;
4842         u32 perf_counter_enable;
4843         u32 pgm[2];
4844         u32 tba[2];
4845         u32 tma[2];
4846         u32 pgm_rsrc[2];
4847         u32 vmid;
4848         u32 resource_limits;
4849         u32 static_thread_mgmt01[2];
4850         u32 tmp_ring_size;
4851         u32 static_thread_mgmt23[2];
4852         u32 restart[3];
4853         u32 thread_trace_enable;
4854         u32 reserved1;
4855         u32 user_data[16];
4856         u32 vgtcs_invoke_count[2];
4857         struct hqd_registers queue_state;
4858         u32 dequeue_cntr;
4859         u32 interrupt_queue[64];
4860 };
4861
4862 /**
4863  * cik_cp_compute_resume - setup the compute queue registers
4864  *
4865  * @rdev: radeon_device pointer
4866  *
4867  * Program the compute queues and test them to make sure they
4868  * are working.
4869  * Returns 0 for success, error for failure.
4870  */
4871 static int cik_cp_compute_resume(struct radeon_device *rdev)
4872 {
4873         int r, i, j, idx;
4874         u32 tmp;
4875         bool use_doorbell = true;
4876         u64 hqd_gpu_addr;
4877         u64 mqd_gpu_addr;
4878         u64 eop_gpu_addr;
4879         u64 wb_gpu_addr;
4880         u32 *buf;
4881         struct bonaire_mqd *mqd;
4882
4883         r = cik_cp_compute_start(rdev);
4884         if (r)
4885                 return r;
4886
4887         /* fix up chicken bits */
4888         tmp = RREG32(CP_CPF_DEBUG);
4889         tmp |= (1 << 23);
4890         WREG32(CP_CPF_DEBUG, tmp);
4891
4892         /* init the pipes */
4893         mutex_lock(&rdev->srbm_mutex);
4894
4895         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4896
4897         cik_srbm_select(rdev, 0, 0, 0, 0);
4898
4899         /* write the EOP addr */
4900         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4901         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4902
4903         /* set the VMID assigned */
4904         WREG32(CP_HPD_EOP_VMID, 0);
4905
4906         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4907         tmp = RREG32(CP_HPD_EOP_CONTROL);
4908         tmp &= ~EOP_SIZE_MASK;
4909         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4910         WREG32(CP_HPD_EOP_CONTROL, tmp);
4911
4912         mutex_unlock(&rdev->srbm_mutex);
4913
4914         /* init the queues.  Just two for now. */
4915         for (i = 0; i < 2; i++) {
4916                 if (i == 0)
4917                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4918                 else
4919                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4920
4921                 if (rdev->ring[idx].mqd_obj == NULL) {
4922                         r = radeon_bo_create(rdev,
4923                                              sizeof(struct bonaire_mqd),
4924                                              PAGE_SIZE, true,
4925                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4926                                              NULL, &rdev->ring[idx].mqd_obj);
4927                         if (r) {
4928                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4929                                 return r;
4930                         }
4931                 }
4932
4933                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4934                 if (unlikely(r != 0)) {
4935                         cik_cp_compute_fini(rdev);
4936                         return r;
4937                 }
4938                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4939                                   &mqd_gpu_addr);
4940                 if (r) {
4941                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4942                         cik_cp_compute_fini(rdev);
4943                         return r;
4944                 }
4945                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4946                 if (r) {
4947                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4948                         cik_cp_compute_fini(rdev);
4949                         return r;
4950                 }
4951
4952                 /* init the mqd struct */
4953                 memset(buf, 0, sizeof(struct bonaire_mqd));
4954
4955                 mqd = (struct bonaire_mqd *)buf;
4956                 mqd->header = 0xC0310800;
4957                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4958                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4959                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4960                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4961
4962                 mutex_lock(&rdev->srbm_mutex);
4963                 cik_srbm_select(rdev, rdev->ring[idx].me,
4964                                 rdev->ring[idx].pipe,
4965                                 rdev->ring[idx].queue, 0);
4966
4967                 /* disable wptr polling */
4968                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4969                 tmp &= ~WPTR_POLL_EN;
4970                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4971
4972                 /* enable doorbell? */
4973                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4974                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4975                 if (use_doorbell)
4976                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4977                 else
4978                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4979                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4980                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4981
4982                 /* disable the queue if it's active */
4983                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4984                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4985                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4986                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4987                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4988                         for (j = 0; j < rdev->usec_timeout; j++) {
4989                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4990                                         break;
4991                                 udelay(1);
4992                         }
4993                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4994                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4995                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4996                 }
4997
4998                 /* set the pointer to the MQD */
4999                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5000                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5001                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5002                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5003                 /* set MQD vmid to 0 */
5004                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5005                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5006                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5007
5008                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5009                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5010                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5011                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5012                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5013                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5014
5015                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5016                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5017                 mqd->queue_state.cp_hqd_pq_control &=
5018                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5019
5020                 mqd->queue_state.cp_hqd_pq_control |=
5021                         order_base_2(rdev->ring[idx].ring_size / 8);
5022                 mqd->queue_state.cp_hqd_pq_control |=
5023                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5024 #ifdef __BIG_ENDIAN
5025                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5026 #endif
5027                 mqd->queue_state.cp_hqd_pq_control &=
5028                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5029                 mqd->queue_state.cp_hqd_pq_control |=
5030                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5031                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5032
5033                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5034                 if (i == 0)
5035                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5036                 else
5037                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5038                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5039                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5040                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5041                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5042                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5043
5044                 /* set the wb address wether it's enabled or not */
5045                 if (i == 0)
5046                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5047                 else
5048                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5049                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5050                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5051                         upper_32_bits(wb_gpu_addr) & 0xffff;
5052                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5053                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5054                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5055                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5056
5057                 /* enable the doorbell if requested */
5058                 if (use_doorbell) {
5059                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5060                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5061                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5062                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5063                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5064                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5065                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5066                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5067
5068                 } else {
5069                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5070                 }
5071                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5072                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5073
5074                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5075                 rdev->ring[idx].wptr = 0;
5076                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5077                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5078                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5079
5080                 /* set the vmid for the queue */
5081                 mqd->queue_state.cp_hqd_vmid = 0;
5082                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5083
5084                 /* activate the queue */
5085                 mqd->queue_state.cp_hqd_active = 1;
5086                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5087
5088                 cik_srbm_select(rdev, 0, 0, 0, 0);
5089                 mutex_unlock(&rdev->srbm_mutex);
5090
5091                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5092                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5093
5094                 rdev->ring[idx].ready = true;
5095                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5096                 if (r)
5097                         rdev->ring[idx].ready = false;
5098         }
5099
5100         return 0;
5101 }
5102
5103 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5104 {
5105         cik_cp_gfx_enable(rdev, enable);
5106         cik_cp_compute_enable(rdev, enable);
5107 }
5108
5109 static int cik_cp_load_microcode(struct radeon_device *rdev)
5110 {
5111         int r;
5112
5113         r = cik_cp_gfx_load_microcode(rdev);
5114         if (r)
5115                 return r;
5116         r = cik_cp_compute_load_microcode(rdev);
5117         if (r)
5118                 return r;
5119
5120         return 0;
5121 }
5122
5123 static void cik_cp_fini(struct radeon_device *rdev)
5124 {
5125         cik_cp_gfx_fini(rdev);
5126         cik_cp_compute_fini(rdev);
5127 }
5128
5129 static int cik_cp_resume(struct radeon_device *rdev)
5130 {
5131         int r;
5132
5133         cik_enable_gui_idle_interrupt(rdev, false);
5134
5135         r = cik_cp_load_microcode(rdev);
5136         if (r)
5137                 return r;
5138
5139         r = cik_cp_gfx_resume(rdev);
5140         if (r)
5141                 return r;
5142         r = cik_cp_compute_resume(rdev);
5143         if (r)
5144                 return r;
5145
5146         cik_enable_gui_idle_interrupt(rdev, true);
5147
5148         return 0;
5149 }
5150
5151 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5152 {
5153         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5154                 RREG32(GRBM_STATUS));
5155         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5156                 RREG32(GRBM_STATUS2));
5157         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5158                 RREG32(GRBM_STATUS_SE0));
5159         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5160                 RREG32(GRBM_STATUS_SE1));
5161         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5162                 RREG32(GRBM_STATUS_SE2));
5163         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5164                 RREG32(GRBM_STATUS_SE3));
5165         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5166                 RREG32(SRBM_STATUS));
5167         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5168                 RREG32(SRBM_STATUS2));
5169         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5170                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5171         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5172                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5173         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5174         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5175                  RREG32(CP_STALLED_STAT1));
5176         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5177                  RREG32(CP_STALLED_STAT2));
5178         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5179                  RREG32(CP_STALLED_STAT3));
5180         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5181                  RREG32(CP_CPF_BUSY_STAT));
5182         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5183                  RREG32(CP_CPF_STALLED_STAT1));
5184         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5185         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5186         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5187                  RREG32(CP_CPC_STALLED_STAT1));
5188         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5189 }
5190
5191 /**
5192  * cik_gpu_check_soft_reset - check which blocks are busy
5193  *
5194  * @rdev: radeon_device pointer
5195  *
5196  * Check which blocks are busy and return the relevant reset
5197  * mask to be used by cik_gpu_soft_reset().
5198  * Returns a mask of the blocks to be reset.
5199  */
5200 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5201 {
5202         u32 reset_mask = 0;
5203         u32 tmp;
5204
5205         /* GRBM_STATUS */
5206         tmp = RREG32(GRBM_STATUS);
5207         if (tmp & (PA_BUSY | SC_BUSY |
5208                    BCI_BUSY | SX_BUSY |
5209                    TA_BUSY | VGT_BUSY |
5210                    DB_BUSY | CB_BUSY |
5211                    GDS_BUSY | SPI_BUSY |
5212                    IA_BUSY | IA_BUSY_NO_DMA))
5213                 reset_mask |= RADEON_RESET_GFX;
5214
5215         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5216                 reset_mask |= RADEON_RESET_CP;
5217
5218         /* GRBM_STATUS2 */
5219         tmp = RREG32(GRBM_STATUS2);
5220         if (tmp & RLC_BUSY)
5221                 reset_mask |= RADEON_RESET_RLC;
5222
5223         /* SDMA0_STATUS_REG */
5224         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5225         if (!(tmp & SDMA_IDLE))
5226                 reset_mask |= RADEON_RESET_DMA;
5227
5228         /* SDMA1_STATUS_REG */
5229         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5230         if (!(tmp & SDMA_IDLE))
5231                 reset_mask |= RADEON_RESET_DMA1;
5232
5233         /* SRBM_STATUS2 */
5234         tmp = RREG32(SRBM_STATUS2);
5235         if (tmp & SDMA_BUSY)
5236                 reset_mask |= RADEON_RESET_DMA;
5237
5238         if (tmp & SDMA1_BUSY)
5239                 reset_mask |= RADEON_RESET_DMA1;
5240
5241         /* SRBM_STATUS */
5242         tmp = RREG32(SRBM_STATUS);
5243
5244         if (tmp & IH_BUSY)
5245                 reset_mask |= RADEON_RESET_IH;
5246
5247         if (tmp & SEM_BUSY)
5248                 reset_mask |= RADEON_RESET_SEM;
5249
5250         if (tmp & GRBM_RQ_PENDING)
5251                 reset_mask |= RADEON_RESET_GRBM;
5252
5253         if (tmp & VMC_BUSY)
5254                 reset_mask |= RADEON_RESET_VMC;
5255
5256         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5257                    MCC_BUSY | MCD_BUSY))
5258                 reset_mask |= RADEON_RESET_MC;
5259
5260         if (evergreen_is_display_hung(rdev))
5261                 reset_mask |= RADEON_RESET_DISPLAY;
5262
5263         /* Skip MC reset as it's mostly likely not hung, just busy */
5264         if (reset_mask & RADEON_RESET_MC) {
5265                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5266                 reset_mask &= ~RADEON_RESET_MC;
5267         }
5268
5269         return reset_mask;
5270 }
5271
5272 /**
5273  * cik_gpu_soft_reset - soft reset GPU
5274  *
5275  * @rdev: radeon_device pointer
5276  * @reset_mask: mask of which blocks to reset
5277  *
5278  * Soft reset the blocks specified in @reset_mask.
5279  */
5280 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5281 {
5282         struct evergreen_mc_save save;
5283         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5284         u32 tmp;
5285
5286         if (reset_mask == 0)
5287                 return;
5288
5289         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5290
5291         cik_print_gpu_status_regs(rdev);
5292         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5293                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5294         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5295                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5296
5297         /* disable CG/PG */
5298         cik_fini_pg(rdev);
5299         cik_fini_cg(rdev);
5300
5301         /* stop the rlc */
5302         cik_rlc_stop(rdev);
5303
5304         /* Disable GFX parsing/prefetching */
5305         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5306
5307         /* Disable MEC parsing/prefetching */
5308         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5309
5310         if (reset_mask & RADEON_RESET_DMA) {
5311                 /* sdma0 */
5312                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5313                 tmp |= SDMA_HALT;
5314                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5315         }
5316         if (reset_mask & RADEON_RESET_DMA1) {
5317                 /* sdma1 */
5318                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5319                 tmp |= SDMA_HALT;
5320                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5321         }
5322
5323         evergreen_mc_stop(rdev, &save);
5324         if (evergreen_mc_wait_for_idle(rdev)) {
5325                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326         }
5327
5328         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5329                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5330
5331         if (reset_mask & RADEON_RESET_CP) {
5332                 grbm_soft_reset |= SOFT_RESET_CP;
5333
5334                 srbm_soft_reset |= SOFT_RESET_GRBM;
5335         }
5336
5337         if (reset_mask & RADEON_RESET_DMA)
5338                 srbm_soft_reset |= SOFT_RESET_SDMA;
5339
5340         if (reset_mask & RADEON_RESET_DMA1)
5341                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5342
5343         if (reset_mask & RADEON_RESET_DISPLAY)
5344                 srbm_soft_reset |= SOFT_RESET_DC;
5345
5346         if (reset_mask & RADEON_RESET_RLC)
5347                 grbm_soft_reset |= SOFT_RESET_RLC;
5348
5349         if (reset_mask & RADEON_RESET_SEM)
5350                 srbm_soft_reset |= SOFT_RESET_SEM;
5351
5352         if (reset_mask & RADEON_RESET_IH)
5353                 srbm_soft_reset |= SOFT_RESET_IH;
5354
5355         if (reset_mask & RADEON_RESET_GRBM)
5356                 srbm_soft_reset |= SOFT_RESET_GRBM;
5357
5358         if (reset_mask & RADEON_RESET_VMC)
5359                 srbm_soft_reset |= SOFT_RESET_VMC;
5360
5361         if (!(rdev->flags & RADEON_IS_IGP)) {
5362                 if (reset_mask & RADEON_RESET_MC)
5363                         srbm_soft_reset |= SOFT_RESET_MC;
5364         }
5365
5366         if (grbm_soft_reset) {
5367                 tmp = RREG32(GRBM_SOFT_RESET);
5368                 tmp |= grbm_soft_reset;
5369                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5370                 WREG32(GRBM_SOFT_RESET, tmp);
5371                 tmp = RREG32(GRBM_SOFT_RESET);
5372
5373                 udelay(50);
5374
5375                 tmp &= ~grbm_soft_reset;
5376                 WREG32(GRBM_SOFT_RESET, tmp);
5377                 tmp = RREG32(GRBM_SOFT_RESET);
5378         }
5379
5380         if (srbm_soft_reset) {
5381                 tmp = RREG32(SRBM_SOFT_RESET);
5382                 tmp |= srbm_soft_reset;
5383                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5384                 WREG32(SRBM_SOFT_RESET, tmp);
5385                 tmp = RREG32(SRBM_SOFT_RESET);
5386
5387                 udelay(50);
5388
5389                 tmp &= ~srbm_soft_reset;
5390                 WREG32(SRBM_SOFT_RESET, tmp);
5391                 tmp = RREG32(SRBM_SOFT_RESET);
5392         }
5393
5394         /* Wait a little for things to settle down */
5395         udelay(50);
5396
5397         evergreen_mc_resume(rdev, &save);
5398         udelay(50);
5399
5400         cik_print_gpu_status_regs(rdev);
5401 }
5402
5403 struct kv_reset_save_regs {
5404         u32 gmcon_reng_execute;
5405         u32 gmcon_misc;
5406         u32 gmcon_misc3;
5407 };
5408
5409 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5410                                    struct kv_reset_save_regs *save)
5411 {
5412         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5413         save->gmcon_misc = RREG32(GMCON_MISC);
5414         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5415
5416         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5417         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5418                                                 STCTRL_STUTTER_EN));
5419 }
5420
5421 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5422                                       struct kv_reset_save_regs *save)
5423 {
5424         int i;
5425
5426         WREG32(GMCON_PGFSM_WRITE, 0);
5427         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5428
5429         for (i = 0; i < 5; i++)
5430                 WREG32(GMCON_PGFSM_WRITE, 0);
5431
5432         WREG32(GMCON_PGFSM_WRITE, 0);
5433         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5434
5435         for (i = 0; i < 5; i++)
5436                 WREG32(GMCON_PGFSM_WRITE, 0);
5437
5438         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5439         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5440
5441         for (i = 0; i < 5; i++)
5442                 WREG32(GMCON_PGFSM_WRITE, 0);
5443
5444         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5445         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5446
5447         for (i = 0; i < 5; i++)
5448                 WREG32(GMCON_PGFSM_WRITE, 0);
5449
5450         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5451         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5452
5453         for (i = 0; i < 5; i++)
5454                 WREG32(GMCON_PGFSM_WRITE, 0);
5455
5456         WREG32(GMCON_PGFSM_WRITE, 0);
5457         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5458
5459         for (i = 0; i < 5; i++)
5460                 WREG32(GMCON_PGFSM_WRITE, 0);
5461
5462         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5463         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5464
5465         for (i = 0; i < 5; i++)
5466                 WREG32(GMCON_PGFSM_WRITE, 0);
5467
5468         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5469         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5470
5471         for (i = 0; i < 5; i++)
5472                 WREG32(GMCON_PGFSM_WRITE, 0);
5473
5474         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5475         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5476
5477         for (i = 0; i < 5; i++)
5478                 WREG32(GMCON_PGFSM_WRITE, 0);
5479
5480         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5481         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5482
5483         for (i = 0; i < 5; i++)
5484                 WREG32(GMCON_PGFSM_WRITE, 0);
5485
5486         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5487         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5488
5489         WREG32(GMCON_MISC3, save->gmcon_misc3);
5490         WREG32(GMCON_MISC, save->gmcon_misc);
5491         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5492 }
5493
5494 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5495 {
5496         struct evergreen_mc_save save;
5497         struct kv_reset_save_regs kv_save = { 0 };
5498         u32 tmp, i;
5499
5500         dev_info(rdev->dev, "GPU pci config reset\n");
5501
5502         /* disable dpm? */
5503
5504         /* disable cg/pg */
5505         cik_fini_pg(rdev);
5506         cik_fini_cg(rdev);
5507
5508         /* Disable GFX parsing/prefetching */
5509         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5510
5511         /* Disable MEC parsing/prefetching */
5512         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5513
5514         /* sdma0 */
5515         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5516         tmp |= SDMA_HALT;
5517         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5518         /* sdma1 */
5519         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5520         tmp |= SDMA_HALT;
5521         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5522         /* XXX other engines? */
5523
5524         /* halt the rlc, disable cp internal ints */
5525         cik_rlc_stop(rdev);
5526
5527         udelay(50);
5528
5529         /* disable mem access */
5530         evergreen_mc_stop(rdev, &save);
5531         if (evergreen_mc_wait_for_idle(rdev)) {
5532                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5533         }
5534
5535         if (rdev->flags & RADEON_IS_IGP)
5536                 kv_save_regs_for_reset(rdev, &kv_save);
5537
5538         /* disable BM */
5539         pci_clear_master(rdev->pdev);
5540         /* reset */
5541         radeon_pci_config_reset(rdev);
5542
5543         udelay(100);
5544
5545         /* wait for asic to come out of reset */
5546         for (i = 0; i < rdev->usec_timeout; i++) {
5547                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5548                         break;
5549                 udelay(1);
5550         }
5551
5552         /* does asic init need to be run first??? */
5553         if (rdev->flags & RADEON_IS_IGP)
5554                 kv_restore_regs_for_reset(rdev, &kv_save);
5555 }
5556
5557 /**
5558  * cik_asic_reset - soft reset GPU
5559  *
5560  * @rdev: radeon_device pointer
5561  *
5562  * Look up which blocks are hung and attempt
5563  * to reset them.
5564  * Returns 0 for success.
5565  */
5566 int cik_asic_reset(struct radeon_device *rdev)
5567 {
5568         u32 reset_mask;
5569
5570         reset_mask = cik_gpu_check_soft_reset(rdev);
5571
5572         if (reset_mask)
5573                 r600_set_bios_scratch_engine_hung(rdev, true);
5574
5575         /* try soft reset */
5576         cik_gpu_soft_reset(rdev, reset_mask);
5577
5578         reset_mask = cik_gpu_check_soft_reset(rdev);
5579
5580         /* try pci config reset */
5581         if (reset_mask && radeon_hard_reset)
5582                 cik_gpu_pci_config_reset(rdev);
5583
5584         reset_mask = cik_gpu_check_soft_reset(rdev);
5585
5586         if (!reset_mask)
5587                 r600_set_bios_scratch_engine_hung(rdev, false);
5588
5589         return 0;
5590 }
5591
5592 /**
5593  * cik_gfx_is_lockup - check if the 3D engine is locked up
5594  *
5595  * @rdev: radeon_device pointer
5596  * @ring: radeon_ring structure holding ring information
5597  *
5598  * Check if the 3D engine is locked up (CIK).
5599  * Returns true if the engine is locked, false if not.
5600  */
5601 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5602 {
5603         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5604
5605         if (!(reset_mask & (RADEON_RESET_GFX |
5606                             RADEON_RESET_COMPUTE |
5607                             RADEON_RESET_CP))) {
5608                 radeon_ring_lockup_update(rdev, ring);
5609                 return false;
5610         }
5611         return radeon_ring_test_lockup(rdev, ring);
5612 }
5613
5614 /* MC */
5615 /**
5616  * cik_mc_program - program the GPU memory controller
5617  *
5618  * @rdev: radeon_device pointer
5619  *
5620  * Set the location of vram, gart, and AGP in the GPU's
5621  * physical address space (CIK).
5622  */
5623 static void cik_mc_program(struct radeon_device *rdev)
5624 {
5625         struct evergreen_mc_save save;
5626         u32 tmp;
5627         int i, j;
5628
5629         /* Initialize HDP */
5630         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5631                 WREG32((0x2c14 + j), 0x00000000);
5632                 WREG32((0x2c18 + j), 0x00000000);
5633                 WREG32((0x2c1c + j), 0x00000000);
5634                 WREG32((0x2c20 + j), 0x00000000);
5635                 WREG32((0x2c24 + j), 0x00000000);
5636         }
5637         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5638
5639         evergreen_mc_stop(rdev, &save);
5640         if (radeon_mc_wait_for_idle(rdev)) {
5641                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5642         }
5643         /* Lockout access through VGA aperture*/
5644         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5645         /* Update configuration */
5646         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5647                rdev->mc.vram_start >> 12);
5648         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5649                rdev->mc.vram_end >> 12);
5650         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5651                rdev->vram_scratch.gpu_addr >> 12);
5652         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5653         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5654         WREG32(MC_VM_FB_LOCATION, tmp);
5655         /* XXX double check these! */
5656         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5657         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5658         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5659         WREG32(MC_VM_AGP_BASE, 0);
5660         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5661         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5662         if (radeon_mc_wait_for_idle(rdev)) {
5663                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5664         }
5665         evergreen_mc_resume(rdev, &save);
5666         /* we need to own VRAM, so turn off the VGA renderer here
5667          * to stop it overwriting our objects */
5668         rv515_vga_render_disable(rdev);
5669 }
5670
5671 /**
5672  * cik_mc_init - initialize the memory controller driver params
5673  *
5674  * @rdev: radeon_device pointer
5675  *
5676  * Look up the amount of vram, vram width, and decide how to place
5677  * vram and gart within the GPU's physical address space (CIK).
5678  * Returns 0 for success.
5679  */
5680 static int cik_mc_init(struct radeon_device *rdev)
5681 {
5682         u32 tmp;
5683         int chansize, numchan;
5684
5685         /* Get VRAM informations */
5686         rdev->mc.vram_is_ddr = true;
5687         tmp = RREG32(MC_ARB_RAMCFG);
5688         if (tmp & CHANSIZE_MASK) {
5689                 chansize = 64;
5690         } else {
5691                 chansize = 32;
5692         }
5693         tmp = RREG32(MC_SHARED_CHMAP);
5694         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5695         case 0:
5696         default:
5697                 numchan = 1;
5698                 break;
5699         case 1:
5700                 numchan = 2;
5701                 break;
5702         case 2:
5703                 numchan = 4;
5704                 break;
5705         case 3:
5706                 numchan = 8;
5707                 break;
5708         case 4:
5709                 numchan = 3;
5710                 break;
5711         case 5:
5712                 numchan = 6;
5713                 break;
5714         case 6:
5715                 numchan = 10;
5716                 break;
5717         case 7:
5718                 numchan = 12;
5719                 break;
5720         case 8:
5721                 numchan = 16;
5722                 break;
5723         }
5724         rdev->mc.vram_width = numchan * chansize;
5725         /* Could aper size report 0 ? */
5726         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5727         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5728         /* size in MB on si */
5729         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5730         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5731         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5732         si_vram_gtt_location(rdev, &rdev->mc);
5733         radeon_update_bandwidth_info(rdev);
5734
5735         return 0;
5736 }
5737
5738 /*
5739  * GART
5740  * VMID 0 is the physical GPU addresses as used by the kernel.
5741  * VMIDs 1-15 are used for userspace clients and are handled
5742  * by the radeon vm/hsa code.
5743  */
5744 /**
5745  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5746  *
5747  * @rdev: radeon_device pointer
5748  *
5749  * Flush the TLB for the VMID 0 page table (CIK).
5750  */
5751 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5752 {
5753         /* flush hdp cache */
5754         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5755
5756         /* bits 0-15 are the VM contexts0-15 */
5757         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5758 }
5759
5760 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5761 {
5762         int i;
5763         uint32_t sh_mem_bases, sh_mem_config;
5764
5765         sh_mem_bases = 0x6000 | 0x6000 << 16;
5766         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5767         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5768
5769         mutex_lock(&rdev->srbm_mutex);
5770         for (i = 8; i < 16; i++) {
5771                 cik_srbm_select(rdev, 0, 0, 0, i);
5772                 /* CP and shaders */
5773                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5774                 WREG32(SH_MEM_APE1_BASE, 1);
5775                 WREG32(SH_MEM_APE1_LIMIT, 0);
5776                 WREG32(SH_MEM_BASES, sh_mem_bases);
5777         }
5778         cik_srbm_select(rdev, 0, 0, 0, 0);
5779         mutex_unlock(&rdev->srbm_mutex);
5780 }
5781
5782 /**
5783  * cik_pcie_gart_enable - gart enable
5784  *
5785  * @rdev: radeon_device pointer
5786  *
5787  * This sets up the TLBs, programs the page tables for VMID0,
5788  * sets up the hw for VMIDs 1-15 which are allocated on
5789  * demand, and sets up the global locations for the LDS, GDS,
5790  * and GPUVM for FSA64 clients (CIK).
5791  * Returns 0 for success, errors for failure.
5792  */
5793 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5794 {
5795         int r, i;
5796
5797         if (rdev->gart.robj == NULL) {
5798                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5799                 return -EINVAL;
5800         }
5801         r = radeon_gart_table_vram_pin(rdev);
5802         if (r)
5803                 return r;
5804         /* Setup TLB control */
5805         WREG32(MC_VM_MX_L1_TLB_CNTL,
5806                (0xA << 7) |
5807                ENABLE_L1_TLB |
5808                ENABLE_L1_FRAGMENT_PROCESSING |
5809                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5810                ENABLE_ADVANCED_DRIVER_MODEL |
5811                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5812         /* Setup L2 cache */
5813         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5814                ENABLE_L2_FRAGMENT_PROCESSING |
5815                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5816                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5817                EFFECTIVE_L2_QUEUE_SIZE(7) |
5818                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5819         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5820         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5821                BANK_SELECT(4) |
5822                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5823         /* setup context0 */
5824         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5825         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5826         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5827         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5828                         (u32)(rdev->dummy_page.addr >> 12));
5829         WREG32(VM_CONTEXT0_CNTL2, 0);
5830         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5831                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5832
5833         WREG32(0x15D4, 0);
5834         WREG32(0x15D8, 0);
5835         WREG32(0x15DC, 0);
5836
5837         /* restore context1-15 */
5838         /* set vm size, must be a multiple of 4 */
5839         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5840         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5841         for (i = 1; i < 16; i++) {
5842                 if (i < 8)
5843                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5844                                rdev->vm_manager.saved_table_addr[i]);
5845                 else
5846                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5847                                rdev->vm_manager.saved_table_addr[i]);
5848         }
5849
5850         /* enable context1-15 */
5851         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5852                (u32)(rdev->dummy_page.addr >> 12));
5853         WREG32(VM_CONTEXT1_CNTL2, 4);
5854         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5855                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5856                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5857                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5858                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5859                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5860                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5861                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5862                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5863                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5864                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5865                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5866                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5867                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5868
5869         if (rdev->family == CHIP_KAVERI) {
5870                 u32 tmp = RREG32(CHUB_CONTROL);
5871                 tmp &= ~BYPASS_VM;
5872                 WREG32(CHUB_CONTROL, tmp);
5873         }
5874
5875         /* XXX SH_MEM regs */
5876         /* where to put LDS, scratch, GPUVM in FSA64 space */
5877         mutex_lock(&rdev->srbm_mutex);
5878         for (i = 0; i < 16; i++) {
5879                 cik_srbm_select(rdev, 0, 0, 0, i);
5880                 /* CP and shaders */
5881                 WREG32(SH_MEM_CONFIG, 0);
5882                 WREG32(SH_MEM_APE1_BASE, 1);
5883                 WREG32(SH_MEM_APE1_LIMIT, 0);
5884                 WREG32(SH_MEM_BASES, 0);
5885                 /* SDMA GFX */
5886                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5887                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5888                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5889                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5890                 /* XXX SDMA RLC - todo */
5891         }
5892         cik_srbm_select(rdev, 0, 0, 0, 0);
5893         mutex_unlock(&rdev->srbm_mutex);
5894
5895         cik_pcie_init_compute_vmid(rdev);
5896
5897         cik_pcie_gart_tlb_flush(rdev);
5898         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5899                  (unsigned)(rdev->mc.gtt_size >> 20),
5900                  (unsigned long long)rdev->gart.table_addr);
5901         rdev->gart.ready = true;
5902         return 0;
5903 }
5904
5905 /**
5906  * cik_pcie_gart_disable - gart disable
5907  *
5908  * @rdev: radeon_device pointer
5909  *
5910  * This disables all VM page table (CIK).
5911  */
5912 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5913 {
5914         unsigned i;
5915
5916         for (i = 1; i < 16; ++i) {
5917                 uint32_t reg;
5918                 if (i < 8)
5919                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5920                 else
5921                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5922                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5923         }
5924
5925         /* Disable all tables */
5926         WREG32(VM_CONTEXT0_CNTL, 0);
5927         WREG32(VM_CONTEXT1_CNTL, 0);
5928         /* Setup TLB control */
5929         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5930                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5931         /* Setup L2 cache */
5932         WREG32(VM_L2_CNTL,
5933                ENABLE_L2_FRAGMENT_PROCESSING |
5934                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5935                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5936                EFFECTIVE_L2_QUEUE_SIZE(7) |
5937                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5938         WREG32(VM_L2_CNTL2, 0);
5939         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5940                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5941         radeon_gart_table_vram_unpin(rdev);
5942 }
5943
5944 /**
5945  * cik_pcie_gart_fini - vm fini callback
5946  *
5947  * @rdev: radeon_device pointer
5948  *
5949  * Tears down the driver GART/VM setup (CIK).
5950  */
5951 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5952 {
5953         cik_pcie_gart_disable(rdev);
5954         radeon_gart_table_vram_free(rdev);
5955         radeon_gart_fini(rdev);
5956 }
5957
5958 /* vm parser */
5959 /**
5960  * cik_ib_parse - vm ib_parse callback
5961  *
5962  * @rdev: radeon_device pointer
5963  * @ib: indirect buffer pointer
5964  *
5965  * CIK uses hw IB checking so this is a nop (CIK).
5966  */
5967 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5968 {
5969         return 0;
5970 }
5971
5972 /*
5973  * vm
5974  * VMID 0 is the physical GPU addresses as used by the kernel.
5975  * VMIDs 1-15 are used for userspace clients and are handled
5976  * by the radeon vm/hsa code.
5977  */
5978 /**
5979  * cik_vm_init - cik vm init callback
5980  *
5981  * @rdev: radeon_device pointer
5982  *
5983  * Inits cik specific vm parameters (number of VMs, base of vram for
5984  * VMIDs 1-15) (CIK).
5985  * Returns 0 for success.
5986  */
5987 int cik_vm_init(struct radeon_device *rdev)
5988 {
5989         /*
5990          * number of VMs
5991          * VMID 0 is reserved for System
5992          * radeon graphics/compute will use VMIDs 1-7
5993          * amdkfd will use VMIDs 8-15
5994          */
5995         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5996         /* base offset of vram pages */
5997         if (rdev->flags & RADEON_IS_IGP) {
5998                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5999                 tmp <<= 22;
6000                 rdev->vm_manager.vram_base_offset = tmp;
6001         } else
6002                 rdev->vm_manager.vram_base_offset = 0;
6003
6004         return 0;
6005 }
6006
6007 /**
6008  * cik_vm_fini - cik vm fini callback
6009  *
6010  * @rdev: radeon_device pointer
6011  *
6012  * Tear down any asic specific VM setup (CIK).
6013  */
6014 void cik_vm_fini(struct radeon_device *rdev)
6015 {
6016 }
6017
6018 /**
6019  * cik_vm_decode_fault - print human readable fault info
6020  *
6021  * @rdev: radeon_device pointer
6022  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6023  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6024  *
6025  * Print human readable fault information (CIK).
6026  */
6027 static void cik_vm_decode_fault(struct radeon_device *rdev,
6028                                 u32 status, u32 addr, u32 mc_client)
6029 {
6030         u32 mc_id;
6031         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6032         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6033         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6034                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6035
6036         if (rdev->family == CHIP_HAWAII)
6037                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6038         else
6039                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6040
6041         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6042                protections, vmid, addr,
6043                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6044                block, mc_client, mc_id);
6045 }
6046
6047 /**
6048  * cik_vm_flush - cik vm flush using the CP
6049  *
6050  * @rdev: radeon_device pointer
6051  *
6052  * Update the page table base and flush the VM TLB
6053  * using the CP (CIK).
6054  */
6055 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6056                   unsigned vm_id, uint64_t pd_addr)
6057 {
6058         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6059
6060         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6061         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6062                                  WRITE_DATA_DST_SEL(0)));
6063         if (vm_id < 8) {
6064                 radeon_ring_write(ring,
6065                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6066         } else {
6067                 radeon_ring_write(ring,
6068                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6069         }
6070         radeon_ring_write(ring, 0);
6071         radeon_ring_write(ring, pd_addr >> 12);
6072
6073         /* update SH_MEM_* regs */
6074         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6075         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6076                                  WRITE_DATA_DST_SEL(0)));
6077         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6078         radeon_ring_write(ring, 0);
6079         radeon_ring_write(ring, VMID(vm_id));
6080
6081         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6082         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6083                                  WRITE_DATA_DST_SEL(0)));
6084         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6085         radeon_ring_write(ring, 0);
6086
6087         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6088         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6089         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6090         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6091
6092         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6093         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6094                                  WRITE_DATA_DST_SEL(0)));
6095         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6096         radeon_ring_write(ring, 0);
6097         radeon_ring_write(ring, VMID(0));
6098
6099         /* HDP flush */
6100         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6101
6102         /* bits 0-15 are the VM contexts0-15 */
6103         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6104         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6105                                  WRITE_DATA_DST_SEL(0)));
6106         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6107         radeon_ring_write(ring, 0);
6108         radeon_ring_write(ring, 1 << vm_id);
6109
6110         /* wait for the invalidate to complete */
6111         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6112         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6113                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6114                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6115         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6116         radeon_ring_write(ring, 0);
6117         radeon_ring_write(ring, 0); /* ref */
6118         radeon_ring_write(ring, 0); /* mask */
6119         radeon_ring_write(ring, 0x20); /* poll interval */
6120
6121         /* compute doesn't have PFP */
6122         if (usepfp) {
6123                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6124                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6125                 radeon_ring_write(ring, 0x0);
6126         }
6127 }
6128
6129 /*
6130  * RLC
6131  * The RLC is a multi-purpose microengine that handles a
6132  * variety of functions, the most important of which is
6133  * the interrupt controller.
6134  */
6135 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6136                                           bool enable)
6137 {
6138         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6139
6140         if (enable)
6141                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6142         else
6143                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6144         WREG32(CP_INT_CNTL_RING0, tmp);
6145 }
6146
6147 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6148 {
6149         u32 tmp;
6150
6151         tmp = RREG32(RLC_LB_CNTL);
6152         if (enable)
6153                 tmp |= LOAD_BALANCE_ENABLE;
6154         else
6155                 tmp &= ~LOAD_BALANCE_ENABLE;
6156         WREG32(RLC_LB_CNTL, tmp);
6157 }
6158
6159 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6160 {
6161         u32 i, j, k;
6162         u32 mask;
6163
6164         mutex_lock(&rdev->grbm_idx_mutex);
6165         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6166                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6167                         cik_select_se_sh(rdev, i, j);
6168                         for (k = 0; k < rdev->usec_timeout; k++) {
6169                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6170                                         break;
6171                                 udelay(1);
6172                         }
6173                 }
6174         }
6175         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6176         mutex_unlock(&rdev->grbm_idx_mutex);
6177
6178         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6179         for (k = 0; k < rdev->usec_timeout; k++) {
6180                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6181                         break;
6182                 udelay(1);
6183         }
6184 }
6185
6186 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6187 {
6188         u32 tmp;
6189
6190         tmp = RREG32(RLC_CNTL);
6191         if (tmp != rlc)
6192                 WREG32(RLC_CNTL, rlc);
6193 }
6194
6195 static u32 cik_halt_rlc(struct radeon_device *rdev)
6196 {
6197         u32 data, orig;
6198
6199         orig = data = RREG32(RLC_CNTL);
6200
6201         if (data & RLC_ENABLE) {
6202                 u32 i;
6203
6204                 data &= ~RLC_ENABLE;
6205                 WREG32(RLC_CNTL, data);
6206
6207                 for (i = 0; i < rdev->usec_timeout; i++) {
6208                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6209                                 break;
6210                         udelay(1);
6211                 }
6212
6213                 cik_wait_for_rlc_serdes(rdev);
6214         }
6215
6216         return orig;
6217 }
6218
6219 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6220 {
6221         u32 tmp, i, mask;
6222
6223         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6224         WREG32(RLC_GPR_REG2, tmp);
6225
6226         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6227         for (i = 0; i < rdev->usec_timeout; i++) {
6228                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6229                         break;
6230                 udelay(1);
6231         }
6232
6233         for (i = 0; i < rdev->usec_timeout; i++) {
6234                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6235                         break;
6236                 udelay(1);
6237         }
6238 }
6239
6240 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6241 {
6242         u32 tmp;
6243
6244         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6245         WREG32(RLC_GPR_REG2, tmp);
6246 }
6247
6248 /**
6249  * cik_rlc_stop - stop the RLC ME
6250  *
6251  * @rdev: radeon_device pointer
6252  *
6253  * Halt the RLC ME (MicroEngine) (CIK).
6254  */
6255 static void cik_rlc_stop(struct radeon_device *rdev)
6256 {
6257         WREG32(RLC_CNTL, 0);
6258
6259         cik_enable_gui_idle_interrupt(rdev, false);
6260
6261         cik_wait_for_rlc_serdes(rdev);
6262 }
6263
6264 /**
6265  * cik_rlc_start - start the RLC ME
6266  *
6267  * @rdev: radeon_device pointer
6268  *
6269  * Unhalt the RLC ME (MicroEngine) (CIK).
6270  */
6271 static void cik_rlc_start(struct radeon_device *rdev)
6272 {
6273         WREG32(RLC_CNTL, RLC_ENABLE);
6274
6275         cik_enable_gui_idle_interrupt(rdev, true);
6276
6277         udelay(50);
6278 }
6279
6280 /**
6281  * cik_rlc_resume - setup the RLC hw
6282  *
6283  * @rdev: radeon_device pointer
6284  *
6285  * Initialize the RLC registers, load the ucode,
6286  * and start the RLC (CIK).
6287  * Returns 0 for success, -EINVAL if the ucode is not available.
6288  */
6289 static int cik_rlc_resume(struct radeon_device *rdev)
6290 {
6291         u32 i, size, tmp;
6292
6293         if (!rdev->rlc_fw)
6294                 return -EINVAL;
6295
6296         cik_rlc_stop(rdev);
6297
6298         /* disable CG */
6299         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6300         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6301
6302         si_rlc_reset(rdev);
6303
6304         cik_init_pg(rdev);
6305
6306         cik_init_cg(rdev);
6307
6308         WREG32(RLC_LB_CNTR_INIT, 0);
6309         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6310
6311         mutex_lock(&rdev->grbm_idx_mutex);
6312         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6313         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6314         WREG32(RLC_LB_PARAMS, 0x00600408);
6315         WREG32(RLC_LB_CNTL, 0x80000004);
6316         mutex_unlock(&rdev->grbm_idx_mutex);
6317
6318         WREG32(RLC_MC_CNTL, 0);
6319         WREG32(RLC_UCODE_CNTL, 0);
6320
6321         if (rdev->new_fw) {
6322                 const struct rlc_firmware_header_v1_0 *hdr =
6323                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6324                 const __le32 *fw_data = (const __le32 *)
6325                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6326
6327                 radeon_ucode_print_rlc_hdr(&hdr->header);
6328
6329                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6330                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6331                 for (i = 0; i < size; i++)
6332                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6333                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6334         } else {
6335                 const __be32 *fw_data;
6336
6337                 switch (rdev->family) {
6338                 case CHIP_BONAIRE:
6339                 case CHIP_HAWAII:
6340                 default:
6341                         size = BONAIRE_RLC_UCODE_SIZE;
6342                         break;
6343                 case CHIP_KAVERI:
6344                         size = KV_RLC_UCODE_SIZE;
6345                         break;
6346                 case CHIP_KABINI:
6347                         size = KB_RLC_UCODE_SIZE;
6348                         break;
6349                 case CHIP_MULLINS:
6350                         size = ML_RLC_UCODE_SIZE;
6351                         break;
6352                 }
6353
6354                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6355                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6356                 for (i = 0; i < size; i++)
6357                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6358                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6359         }
6360
6361         /* XXX - find out what chips support lbpw */
6362         cik_enable_lbpw(rdev, false);
6363
6364         if (rdev->family == CHIP_BONAIRE)
6365                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6366
6367         cik_rlc_start(rdev);
6368
6369         return 0;
6370 }
6371
6372 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6373 {
6374         u32 data, orig, tmp, tmp2;
6375
6376         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6377
6378         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6379                 cik_enable_gui_idle_interrupt(rdev, true);
6380
6381                 tmp = cik_halt_rlc(rdev);
6382
6383                 mutex_lock(&rdev->grbm_idx_mutex);
6384                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6385                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6386                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6387                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6388                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6389                 mutex_unlock(&rdev->grbm_idx_mutex);
6390
6391                 cik_update_rlc(rdev, tmp);
6392
6393                 data |= CGCG_EN | CGLS_EN;
6394         } else {
6395                 cik_enable_gui_idle_interrupt(rdev, false);
6396
6397                 RREG32(CB_CGTT_SCLK_CTRL);
6398                 RREG32(CB_CGTT_SCLK_CTRL);
6399                 RREG32(CB_CGTT_SCLK_CTRL);
6400                 RREG32(CB_CGTT_SCLK_CTRL);
6401
6402                 data &= ~(CGCG_EN | CGLS_EN);
6403         }
6404
6405         if (orig != data)
6406                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6407
6408 }
6409
6410 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6411 {
6412         u32 data, orig, tmp = 0;
6413
6414         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6415                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6416                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6417                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6418                                 data |= CP_MEM_LS_EN;
6419                                 if (orig != data)
6420                                         WREG32(CP_MEM_SLP_CNTL, data);
6421                         }
6422                 }
6423
6424                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6425                 data |= 0x00000001;
6426                 data &= 0xfffffffd;
6427                 if (orig != data)
6428                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6429
6430                 tmp = cik_halt_rlc(rdev);
6431
6432                 mutex_lock(&rdev->grbm_idx_mutex);
6433                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6434                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6435                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6436                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6437                 WREG32(RLC_SERDES_WR_CTRL, data);
6438                 mutex_unlock(&rdev->grbm_idx_mutex);
6439
6440                 cik_update_rlc(rdev, tmp);
6441
6442                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6443                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6444                         data &= ~SM_MODE_MASK;
6445                         data |= SM_MODE(0x2);
6446                         data |= SM_MODE_ENABLE;
6447                         data &= ~CGTS_OVERRIDE;
6448                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6449                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6450                                 data &= ~CGTS_LS_OVERRIDE;
6451                         data &= ~ON_MONITOR_ADD_MASK;
6452                         data |= ON_MONITOR_ADD_EN;
6453                         data |= ON_MONITOR_ADD(0x96);
6454                         if (orig != data)
6455                                 WREG32(CGTS_SM_CTRL_REG, data);
6456                 }
6457         } else {
6458                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6459                 data |= 0x00000003;
6460                 if (orig != data)
6461                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6462
6463                 data = RREG32(RLC_MEM_SLP_CNTL);
6464                 if (data & RLC_MEM_LS_EN) {
6465                         data &= ~RLC_MEM_LS_EN;
6466                         WREG32(RLC_MEM_SLP_CNTL, data);
6467                 }
6468
6469                 data = RREG32(CP_MEM_SLP_CNTL);
6470                 if (data & CP_MEM_LS_EN) {
6471                         data &= ~CP_MEM_LS_EN;
6472                         WREG32(CP_MEM_SLP_CNTL, data);
6473                 }
6474
6475                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6476                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6477                 if (orig != data)
6478                         WREG32(CGTS_SM_CTRL_REG, data);
6479
6480                 tmp = cik_halt_rlc(rdev);
6481
6482                 mutex_lock(&rdev->grbm_idx_mutex);
6483                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6484                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6485                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6486                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6487                 WREG32(RLC_SERDES_WR_CTRL, data);
6488                 mutex_unlock(&rdev->grbm_idx_mutex);
6489
6490                 cik_update_rlc(rdev, tmp);
6491         }
6492 }
6493
6494 static const u32 mc_cg_registers[] =
6495 {
6496         MC_HUB_MISC_HUB_CG,
6497         MC_HUB_MISC_SIP_CG,
6498         MC_HUB_MISC_VM_CG,
6499         MC_XPB_CLK_GAT,
6500         ATC_MISC_CG,
6501         MC_CITF_MISC_WR_CG,
6502         MC_CITF_MISC_RD_CG,
6503         MC_CITF_MISC_VM_CG,
6504         VM_L2_CG,
6505 };
6506
6507 static void cik_enable_mc_ls(struct radeon_device *rdev,
6508                              bool enable)
6509 {
6510         int i;
6511         u32 orig, data;
6512
6513         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6514                 orig = data = RREG32(mc_cg_registers[i]);
6515                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6516                         data |= MC_LS_ENABLE;
6517                 else
6518                         data &= ~MC_LS_ENABLE;
6519                 if (data != orig)
6520                         WREG32(mc_cg_registers[i], data);
6521         }
6522 }
6523
6524 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6525                                bool enable)
6526 {
6527         int i;
6528         u32 orig, data;
6529
6530         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6531                 orig = data = RREG32(mc_cg_registers[i]);
6532                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6533                         data |= MC_CG_ENABLE;
6534                 else
6535                         data &= ~MC_CG_ENABLE;
6536                 if (data != orig)
6537                         WREG32(mc_cg_registers[i], data);
6538         }
6539 }
6540
6541 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6542                                  bool enable)
6543 {
6544         u32 orig, data;
6545
6546         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6547                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6548                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6549         } else {
6550                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6551                 data |= 0xff000000;
6552                 if (data != orig)
6553                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6554
6555                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6556                 data |= 0xff000000;
6557                 if (data != orig)
6558                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6559         }
6560 }
6561
6562 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6563                                  bool enable)
6564 {
6565         u32 orig, data;
6566
6567         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6568                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6569                 data |= 0x100;
6570                 if (orig != data)
6571                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6572
6573                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6574                 data |= 0x100;
6575                 if (orig != data)
6576                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6577         } else {
6578                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6579                 data &= ~0x100;
6580                 if (orig != data)
6581                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6582
6583                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6584                 data &= ~0x100;
6585                 if (orig != data)
6586                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6587         }
6588 }
6589
6590 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6591                                 bool enable)
6592 {
6593         u32 orig, data;
6594
6595         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6596                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6597                 data = 0xfff;
6598                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6599
6600                 orig = data = RREG32(UVD_CGC_CTRL);
6601                 data |= DCM;
6602                 if (orig != data)
6603                         WREG32(UVD_CGC_CTRL, data);
6604         } else {
6605                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6606                 data &= ~0xfff;
6607                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6608
6609                 orig = data = RREG32(UVD_CGC_CTRL);
6610                 data &= ~DCM;
6611                 if (orig != data)
6612                         WREG32(UVD_CGC_CTRL, data);
6613         }
6614 }
6615
6616 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6617                                bool enable)
6618 {
6619         u32 orig, data;
6620
6621         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6622
6623         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6624                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6625                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6626         else
6627                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6628                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6629
6630         if (orig != data)
6631                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6632 }
6633
6634 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6635                                 bool enable)
6636 {
6637         u32 orig, data;
6638
6639         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6640
6641         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6642                 data &= ~CLOCK_GATING_DIS;
6643         else
6644                 data |= CLOCK_GATING_DIS;
6645
6646         if (orig != data)
6647                 WREG32(HDP_HOST_PATH_CNTL, data);
6648 }
6649
6650 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6651                               bool enable)
6652 {
6653         u32 orig, data;
6654
6655         orig = data = RREG32(HDP_MEM_POWER_LS);
6656
6657         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6658                 data |= HDP_LS_ENABLE;
6659         else
6660                 data &= ~HDP_LS_ENABLE;
6661
6662         if (orig != data)
6663                 WREG32(HDP_MEM_POWER_LS, data);
6664 }
6665
6666 void cik_update_cg(struct radeon_device *rdev,
6667                    u32 block, bool enable)
6668 {
6669
6670         if (block & RADEON_CG_BLOCK_GFX) {
6671                 cik_enable_gui_idle_interrupt(rdev, false);
6672                 /* order matters! */
6673                 if (enable) {
6674                         cik_enable_mgcg(rdev, true);
6675                         cik_enable_cgcg(rdev, true);
6676                 } else {
6677                         cik_enable_cgcg(rdev, false);
6678                         cik_enable_mgcg(rdev, false);
6679                 }
6680                 cik_enable_gui_idle_interrupt(rdev, true);
6681         }
6682
6683         if (block & RADEON_CG_BLOCK_MC) {
6684                 if (!(rdev->flags & RADEON_IS_IGP)) {
6685                         cik_enable_mc_mgcg(rdev, enable);
6686                         cik_enable_mc_ls(rdev, enable);
6687                 }
6688         }
6689
6690         if (block & RADEON_CG_BLOCK_SDMA) {
6691                 cik_enable_sdma_mgcg(rdev, enable);
6692                 cik_enable_sdma_mgls(rdev, enable);
6693         }
6694
6695         if (block & RADEON_CG_BLOCK_BIF) {
6696                 cik_enable_bif_mgls(rdev, enable);
6697         }
6698
6699         if (block & RADEON_CG_BLOCK_UVD) {
6700                 if (rdev->has_uvd)
6701                         cik_enable_uvd_mgcg(rdev, enable);
6702         }
6703
6704         if (block & RADEON_CG_BLOCK_HDP) {
6705                 cik_enable_hdp_mgcg(rdev, enable);
6706                 cik_enable_hdp_ls(rdev, enable);
6707         }
6708
6709         if (block & RADEON_CG_BLOCK_VCE) {
6710                 vce_v2_0_enable_mgcg(rdev, enable);
6711         }
6712 }
6713
6714 static void cik_init_cg(struct radeon_device *rdev)
6715 {
6716
6717         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6718
6719         if (rdev->has_uvd)
6720                 si_init_uvd_internal_cg(rdev);
6721
6722         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6723                              RADEON_CG_BLOCK_SDMA |
6724                              RADEON_CG_BLOCK_BIF |
6725                              RADEON_CG_BLOCK_UVD |
6726                              RADEON_CG_BLOCK_HDP), true);
6727 }
6728
6729 static void cik_fini_cg(struct radeon_device *rdev)
6730 {
6731         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6732                              RADEON_CG_BLOCK_SDMA |
6733                              RADEON_CG_BLOCK_BIF |
6734                              RADEON_CG_BLOCK_UVD |
6735                              RADEON_CG_BLOCK_HDP), false);
6736
6737         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6738 }
6739
6740 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6741                                           bool enable)
6742 {
6743         u32 data, orig;
6744
6745         orig = data = RREG32(RLC_PG_CNTL);
6746         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6747                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6748         else
6749                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6750         if (orig != data)
6751                 WREG32(RLC_PG_CNTL, data);
6752 }
6753
6754 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6755                                           bool enable)
6756 {
6757         u32 data, orig;
6758
6759         orig = data = RREG32(RLC_PG_CNTL);
6760         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6761                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6762         else
6763                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6764         if (orig != data)
6765                 WREG32(RLC_PG_CNTL, data);
6766 }
6767
6768 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6769 {
6770         u32 data, orig;
6771
6772         orig = data = RREG32(RLC_PG_CNTL);
6773         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6774                 data &= ~DISABLE_CP_PG;
6775         else
6776                 data |= DISABLE_CP_PG;
6777         if (orig != data)
6778                 WREG32(RLC_PG_CNTL, data);
6779 }
6780
6781 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6782 {
6783         u32 data, orig;
6784
6785         orig = data = RREG32(RLC_PG_CNTL);
6786         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6787                 data &= ~DISABLE_GDS_PG;
6788         else
6789                 data |= DISABLE_GDS_PG;
6790         if (orig != data)
6791                 WREG32(RLC_PG_CNTL, data);
6792 }
6793
6794 #define CP_ME_TABLE_SIZE    96
6795 #define CP_ME_TABLE_OFFSET  2048
6796 #define CP_MEC_TABLE_OFFSET 4096
6797
6798 void cik_init_cp_pg_table(struct radeon_device *rdev)
6799 {
6800         volatile u32 *dst_ptr;
6801         int me, i, max_me = 4;
6802         u32 bo_offset = 0;
6803         u32 table_offset, table_size;
6804
6805         if (rdev->family == CHIP_KAVERI)
6806                 max_me = 5;
6807
6808         if (rdev->rlc.cp_table_ptr == NULL)
6809                 return;
6810
6811         /* write the cp table buffer */
6812         dst_ptr = rdev->rlc.cp_table_ptr;
6813         for (me = 0; me < max_me; me++) {
6814                 if (rdev->new_fw) {
6815                         const __le32 *fw_data;
6816                         const struct gfx_firmware_header_v1_0 *hdr;
6817
6818                         if (me == 0) {
6819                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6820                                 fw_data = (const __le32 *)
6821                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6822                                 table_offset = le32_to_cpu(hdr->jt_offset);
6823                                 table_size = le32_to_cpu(hdr->jt_size);
6824                         } else if (me == 1) {
6825                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6826                                 fw_data = (const __le32 *)
6827                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6828                                 table_offset = le32_to_cpu(hdr->jt_offset);
6829                                 table_size = le32_to_cpu(hdr->jt_size);
6830                         } else if (me == 2) {
6831                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6832                                 fw_data = (const __le32 *)
6833                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6834                                 table_offset = le32_to_cpu(hdr->jt_offset);
6835                                 table_size = le32_to_cpu(hdr->jt_size);
6836                         } else if (me == 3) {
6837                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6838                                 fw_data = (const __le32 *)
6839                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6840                                 table_offset = le32_to_cpu(hdr->jt_offset);
6841                                 table_size = le32_to_cpu(hdr->jt_size);
6842                         } else {
6843                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6844                                 fw_data = (const __le32 *)
6845                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6846                                 table_offset = le32_to_cpu(hdr->jt_offset);
6847                                 table_size = le32_to_cpu(hdr->jt_size);
6848                         }
6849
6850                         for (i = 0; i < table_size; i ++) {
6851                                 dst_ptr[bo_offset + i] =
6852                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6853                         }
6854                         bo_offset += table_size;
6855                 } else {
6856                         const __be32 *fw_data;
6857                         table_size = CP_ME_TABLE_SIZE;
6858
6859                         if (me == 0) {
6860                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6861                                 table_offset = CP_ME_TABLE_OFFSET;
6862                         } else if (me == 1) {
6863                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6864                                 table_offset = CP_ME_TABLE_OFFSET;
6865                         } else if (me == 2) {
6866                                 fw_data = (const __be32 *)rdev->me_fw->data;
6867                                 table_offset = CP_ME_TABLE_OFFSET;
6868                         } else {
6869                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6870                                 table_offset = CP_MEC_TABLE_OFFSET;
6871                         }
6872
6873                         for (i = 0; i < table_size; i ++) {
6874                                 dst_ptr[bo_offset + i] =
6875                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6876                         }
6877                         bo_offset += table_size;
6878                 }
6879         }
6880 }
6881
6882 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6883                                 bool enable)
6884 {
6885         u32 data, orig;
6886
6887         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6888                 orig = data = RREG32(RLC_PG_CNTL);
6889                 data |= GFX_PG_ENABLE;
6890                 if (orig != data)
6891                         WREG32(RLC_PG_CNTL, data);
6892
6893                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6894                 data |= AUTO_PG_EN;
6895                 if (orig != data)
6896                         WREG32(RLC_AUTO_PG_CTRL, data);
6897         } else {
6898                 orig = data = RREG32(RLC_PG_CNTL);
6899                 data &= ~GFX_PG_ENABLE;
6900                 if (orig != data)
6901                         WREG32(RLC_PG_CNTL, data);
6902
6903                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6904                 data &= ~AUTO_PG_EN;
6905                 if (orig != data)
6906                         WREG32(RLC_AUTO_PG_CTRL, data);
6907
6908                 data = RREG32(DB_RENDER_CONTROL);
6909         }
6910 }
6911
6912 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6913 {
6914         u32 mask = 0, tmp, tmp1;
6915         int i;
6916
6917         mutex_lock(&rdev->grbm_idx_mutex);
6918         cik_select_se_sh(rdev, se, sh);
6919         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6920         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6921         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6922         mutex_unlock(&rdev->grbm_idx_mutex);
6923
6924         tmp &= 0xffff0000;
6925
6926         tmp |= tmp1;
6927         tmp >>= 16;
6928
6929         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6930                 mask <<= 1;
6931                 mask |= 1;
6932         }
6933
6934         return (~tmp) & mask;
6935 }
6936
6937 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6938 {
6939         u32 i, j, k, active_cu_number = 0;
6940         u32 mask, counter, cu_bitmap;
6941         u32 tmp = 0;
6942
6943         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6944                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6945                         mask = 1;
6946                         cu_bitmap = 0;
6947                         counter = 0;
6948                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6949                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6950                                         if (counter < 2)
6951                                                 cu_bitmap |= mask;
6952                                         counter ++;
6953                                 }
6954                                 mask <<= 1;
6955                         }
6956
6957                         active_cu_number += counter;
6958                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6959                 }
6960         }
6961
6962         WREG32(RLC_PG_AO_CU_MASK, tmp);
6963
6964         tmp = RREG32(RLC_MAX_PG_CU);
6965         tmp &= ~MAX_PU_CU_MASK;
6966         tmp |= MAX_PU_CU(active_cu_number);
6967         WREG32(RLC_MAX_PG_CU, tmp);
6968 }
6969
6970 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6971                                        bool enable)
6972 {
6973         u32 data, orig;
6974
6975         orig = data = RREG32(RLC_PG_CNTL);
6976         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6977                 data |= STATIC_PER_CU_PG_ENABLE;
6978         else
6979                 data &= ~STATIC_PER_CU_PG_ENABLE;
6980         if (orig != data)
6981                 WREG32(RLC_PG_CNTL, data);
6982 }
6983
6984 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6985                                         bool enable)
6986 {
6987         u32 data, orig;
6988
6989         orig = data = RREG32(RLC_PG_CNTL);
6990         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6991                 data |= DYN_PER_CU_PG_ENABLE;
6992         else
6993                 data &= ~DYN_PER_CU_PG_ENABLE;
6994         if (orig != data)
6995                 WREG32(RLC_PG_CNTL, data);
6996 }
6997
6998 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6999 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7000
7001 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7002 {
7003         u32 data, orig;
7004         u32 i;
7005
7006         if (rdev->rlc.cs_data) {
7007                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7008                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7009                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7010                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7011         } else {
7012                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7013                 for (i = 0; i < 3; i++)
7014                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
7015         }
7016         if (rdev->rlc.reg_list) {
7017                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7018                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
7019                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7020         }
7021
7022         orig = data = RREG32(RLC_PG_CNTL);
7023         data |= GFX_PG_SRC;
7024         if (orig != data)
7025                 WREG32(RLC_PG_CNTL, data);
7026
7027         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7028         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7029
7030         data = RREG32(CP_RB_WPTR_POLL_CNTL);
7031         data &= ~IDLE_POLL_COUNT_MASK;
7032         data |= IDLE_POLL_COUNT(0x60);
7033         WREG32(CP_RB_WPTR_POLL_CNTL, data);
7034
7035         data = 0x10101010;
7036         WREG32(RLC_PG_DELAY, data);
7037
7038         data = RREG32(RLC_PG_DELAY_2);
7039         data &= ~0xff;
7040         data |= 0x3;
7041         WREG32(RLC_PG_DELAY_2, data);
7042
7043         data = RREG32(RLC_AUTO_PG_CTRL);
7044         data &= ~GRBM_REG_SGIT_MASK;
7045         data |= GRBM_REG_SGIT(0x700);
7046         WREG32(RLC_AUTO_PG_CTRL, data);
7047
7048 }
7049
7050 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7051 {
7052         cik_enable_gfx_cgpg(rdev, enable);
7053         cik_enable_gfx_static_mgpg(rdev, enable);
7054         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7055 }
7056
7057 u32 cik_get_csb_size(struct radeon_device *rdev)
7058 {
7059         u32 count = 0;
7060         const struct cs_section_def *sect = NULL;
7061         const struct cs_extent_def *ext = NULL;
7062
7063         if (rdev->rlc.cs_data == NULL)
7064                 return 0;
7065
7066         /* begin clear state */
7067         count += 2;
7068         /* context control state */
7069         count += 3;
7070
7071         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7072                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7073                         if (sect->id == SECT_CONTEXT)
7074                                 count += 2 + ext->reg_count;
7075                         else
7076                                 return 0;
7077                 }
7078         }
7079         /* pa_sc_raster_config/pa_sc_raster_config1 */
7080         count += 4;
7081         /* end clear state */
7082         count += 2;
7083         /* clear state */
7084         count += 2;
7085
7086         return count;
7087 }
7088
7089 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7090 {
7091         u32 count = 0, i;
7092         const struct cs_section_def *sect = NULL;
7093         const struct cs_extent_def *ext = NULL;
7094
7095         if (rdev->rlc.cs_data == NULL)
7096                 return;
7097         if (buffer == NULL)
7098                 return;
7099
7100         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7101         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7102
7103         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7104         buffer[count++] = cpu_to_le32(0x80000000);
7105         buffer[count++] = cpu_to_le32(0x80000000);
7106
7107         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7108                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7109                         if (sect->id == SECT_CONTEXT) {
7110                                 buffer[count++] =
7111                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7112                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7113                                 for (i = 0; i < ext->reg_count; i++)
7114                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7115                         } else {
7116                                 return;
7117                         }
7118                 }
7119         }
7120
7121         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7122         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7123         switch (rdev->family) {
7124         case CHIP_BONAIRE:
7125                 buffer[count++] = cpu_to_le32(0x16000012);
7126                 buffer[count++] = cpu_to_le32(0x00000000);
7127                 break;
7128         case CHIP_KAVERI:
7129                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7130                 buffer[count++] = cpu_to_le32(0x00000000);
7131                 break;
7132         case CHIP_KABINI:
7133         case CHIP_MULLINS:
7134                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7135                 buffer[count++] = cpu_to_le32(0x00000000);
7136                 break;
7137         case CHIP_HAWAII:
7138                 buffer[count++] = cpu_to_le32(0x3a00161a);
7139                 buffer[count++] = cpu_to_le32(0x0000002e);
7140                 break;
7141         default:
7142                 buffer[count++] = cpu_to_le32(0x00000000);
7143                 buffer[count++] = cpu_to_le32(0x00000000);
7144                 break;
7145         }
7146
7147         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7148         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7149
7150         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7151         buffer[count++] = cpu_to_le32(0);
7152 }
7153
7154 static void cik_init_pg(struct radeon_device *rdev)
7155 {
7156         if (rdev->pg_flags) {
7157                 cik_enable_sck_slowdown_on_pu(rdev, true);
7158                 cik_enable_sck_slowdown_on_pd(rdev, true);
7159                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7160                         cik_init_gfx_cgpg(rdev);
7161                         cik_enable_cp_pg(rdev, true);
7162                         cik_enable_gds_pg(rdev, true);
7163                 }
7164                 cik_init_ao_cu_mask(rdev);
7165                 cik_update_gfx_pg(rdev, true);
7166         }
7167 }
7168
7169 static void cik_fini_pg(struct radeon_device *rdev)
7170 {
7171         if (rdev->pg_flags) {
7172                 cik_update_gfx_pg(rdev, false);
7173                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7174                         cik_enable_cp_pg(rdev, false);
7175                         cik_enable_gds_pg(rdev, false);
7176                 }
7177         }
7178 }
7179
7180 /*
7181  * Interrupts
7182  * Starting with r6xx, interrupts are handled via a ring buffer.
7183  * Ring buffers are areas of GPU accessible memory that the GPU
7184  * writes interrupt vectors into and the host reads vectors out of.
7185  * There is a rptr (read pointer) that determines where the
7186  * host is currently reading, and a wptr (write pointer)
7187  * which determines where the GPU has written.  When the
7188  * pointers are equal, the ring is idle.  When the GPU
7189  * writes vectors to the ring buffer, it increments the
7190  * wptr.  When there is an interrupt, the host then starts
7191  * fetching commands and processing them until the pointers are
7192  * equal again at which point it updates the rptr.
7193  */
7194
7195 /**
7196  * cik_enable_interrupts - Enable the interrupt ring buffer
7197  *
7198  * @rdev: radeon_device pointer
7199  *
7200  * Enable the interrupt ring buffer (CIK).
7201  */
7202 static void cik_enable_interrupts(struct radeon_device *rdev)
7203 {
7204         u32 ih_cntl = RREG32(IH_CNTL);
7205         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7206
7207         ih_cntl |= ENABLE_INTR;
7208         ih_rb_cntl |= IH_RB_ENABLE;
7209         WREG32(IH_CNTL, ih_cntl);
7210         WREG32(IH_RB_CNTL, ih_rb_cntl);
7211         rdev->ih.enabled = true;
7212 }
7213
7214 /**
7215  * cik_disable_interrupts - Disable the interrupt ring buffer
7216  *
7217  * @rdev: radeon_device pointer
7218  *
7219  * Disable the interrupt ring buffer (CIK).
7220  */
7221 static void cik_disable_interrupts(struct radeon_device *rdev)
7222 {
7223         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7224         u32 ih_cntl = RREG32(IH_CNTL);
7225
7226         ih_rb_cntl &= ~IH_RB_ENABLE;
7227         ih_cntl &= ~ENABLE_INTR;
7228         WREG32(IH_RB_CNTL, ih_rb_cntl);
7229         WREG32(IH_CNTL, ih_cntl);
7230         /* set rptr, wptr to 0 */
7231         WREG32(IH_RB_RPTR, 0);
7232         WREG32(IH_RB_WPTR, 0);
7233         rdev->ih.enabled = false;
7234         rdev->ih.rptr = 0;
7235 }
7236
7237 /**
7238  * cik_disable_interrupt_state - Disable all interrupt sources
7239  *
7240  * @rdev: radeon_device pointer
7241  *
7242  * Clear all interrupt enable bits used by the driver (CIK).
7243  */
7244 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7245 {
7246         u32 tmp;
7247
7248         /* gfx ring */
7249         tmp = RREG32(CP_INT_CNTL_RING0) &
7250                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7251         WREG32(CP_INT_CNTL_RING0, tmp);
7252         /* sdma */
7253         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7254         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7255         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7256         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7257         /* compute queues */
7258         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7259         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7260         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7261         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7262         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7263         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7264         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7265         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7266         /* grbm */
7267         WREG32(GRBM_INT_CNTL, 0);
7268         /* SRBM */
7269         WREG32(SRBM_INT_CNTL, 0);
7270         /* vline/vblank, etc. */
7271         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7272         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7273         if (rdev->num_crtc >= 4) {
7274                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7275                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7276         }
7277         if (rdev->num_crtc >= 6) {
7278                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7279                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7280         }
7281         /* pflip */
7282         if (rdev->num_crtc >= 2) {
7283                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7284                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7285         }
7286         if (rdev->num_crtc >= 4) {
7287                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7288                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7289         }
7290         if (rdev->num_crtc >= 6) {
7291                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7292                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7293         }
7294
7295         /* dac hotplug */
7296         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7297
7298         /* digital hotplug */
7299         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7300         WREG32(DC_HPD1_INT_CONTROL, tmp);
7301         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7302         WREG32(DC_HPD2_INT_CONTROL, tmp);
7303         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7304         WREG32(DC_HPD3_INT_CONTROL, tmp);
7305         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7306         WREG32(DC_HPD4_INT_CONTROL, tmp);
7307         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7308         WREG32(DC_HPD5_INT_CONTROL, tmp);
7309         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7310         WREG32(DC_HPD6_INT_CONTROL, tmp);
7311
7312 }
7313
7314 /**
7315  * cik_irq_init - init and enable the interrupt ring
7316  *
7317  * @rdev: radeon_device pointer
7318  *
7319  * Allocate a ring buffer for the interrupt controller,
7320  * enable the RLC, disable interrupts, enable the IH
7321  * ring buffer and enable it (CIK).
7322  * Called at device load and reume.
7323  * Returns 0 for success, errors for failure.
7324  */
7325 static int cik_irq_init(struct radeon_device *rdev)
7326 {
7327         int ret = 0;
7328         int rb_bufsz;
7329         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7330
7331         /* allocate ring */
7332         ret = r600_ih_ring_alloc(rdev);
7333         if (ret)
7334                 return ret;
7335
7336         /* disable irqs */
7337         cik_disable_interrupts(rdev);
7338
7339         /* init rlc */
7340         ret = cik_rlc_resume(rdev);
7341         if (ret) {
7342                 r600_ih_ring_fini(rdev);
7343                 return ret;
7344         }
7345
7346         /* setup interrupt control */
7347         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7348         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7349         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7350         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7351          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7352          */
7353         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7354         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7355         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7356         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7357
7358         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7359         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7360
7361         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7362                       IH_WPTR_OVERFLOW_CLEAR |
7363                       (rb_bufsz << 1));
7364
7365         if (rdev->wb.enabled)
7366                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7367
7368         /* set the writeback address whether it's enabled or not */
7369         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7370         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7371
7372         WREG32(IH_RB_CNTL, ih_rb_cntl);
7373
7374         /* set rptr, wptr to 0 */
7375         WREG32(IH_RB_RPTR, 0);
7376         WREG32(IH_RB_WPTR, 0);
7377
7378         /* Default settings for IH_CNTL (disabled at first) */
7379         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7380         /* RPTR_REARM only works if msi's are enabled */
7381         if (rdev->msi_enabled)
7382                 ih_cntl |= RPTR_REARM;
7383         WREG32(IH_CNTL, ih_cntl);
7384
7385         /* force the active interrupt state to all disabled */
7386         cik_disable_interrupt_state(rdev);
7387
7388         pci_set_master(rdev->pdev);
7389
7390         /* enable irqs */
7391         cik_enable_interrupts(rdev);
7392
7393         return ret;
7394 }
7395
7396 /**
7397  * cik_irq_set - enable/disable interrupt sources
7398  *
7399  * @rdev: radeon_device pointer
7400  *
7401  * Enable interrupt sources on the GPU (vblanks, hpd,
7402  * etc.) (CIK).
7403  * Returns 0 for success, errors for failure.
7404  */
7405 int cik_irq_set(struct radeon_device *rdev)
7406 {
7407         u32 cp_int_cntl;
7408         u32 cp_m1p0;
7409         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7410         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7411         u32 grbm_int_cntl = 0;
7412         u32 dma_cntl, dma_cntl1;
7413
7414         if (!rdev->irq.installed) {
7415                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7416                 return -EINVAL;
7417         }
7418         /* don't enable anything if the ih is disabled */
7419         if (!rdev->ih.enabled) {
7420                 cik_disable_interrupts(rdev);
7421                 /* force the active interrupt state to all disabled */
7422                 cik_disable_interrupt_state(rdev);
7423                 return 0;
7424         }
7425
7426         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7427                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7428         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7429
7430         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7431         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7432         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7433         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7434         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7435         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7436
7437         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7438         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7439
7440         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7441
7442         /* enable CP interrupts on all rings */
7443         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7444                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7445                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7446         }
7447         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7448                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7449                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7450                 if (ring->me == 1) {
7451                         switch (ring->pipe) {
7452                         case 0:
7453                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7454                                 break;
7455                         default:
7456                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7457                                 break;
7458                         }
7459                 } else {
7460                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7461                 }
7462         }
7463         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7464                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7465                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7466                 if (ring->me == 1) {
7467                         switch (ring->pipe) {
7468                         case 0:
7469                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7470                                 break;
7471                         default:
7472                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7473                                 break;
7474                         }
7475                 } else {
7476                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7477                 }
7478         }
7479
7480         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7481                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7482                 dma_cntl |= TRAP_ENABLE;
7483         }
7484
7485         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7486                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7487                 dma_cntl1 |= TRAP_ENABLE;
7488         }
7489
7490         if (rdev->irq.crtc_vblank_int[0] ||
7491             atomic_read(&rdev->irq.pflip[0])) {
7492                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7493                 crtc1 |= VBLANK_INTERRUPT_MASK;
7494         }
7495         if (rdev->irq.crtc_vblank_int[1] ||
7496             atomic_read(&rdev->irq.pflip[1])) {
7497                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7498                 crtc2 |= VBLANK_INTERRUPT_MASK;
7499         }
7500         if (rdev->irq.crtc_vblank_int[2] ||
7501             atomic_read(&rdev->irq.pflip[2])) {
7502                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7503                 crtc3 |= VBLANK_INTERRUPT_MASK;
7504         }
7505         if (rdev->irq.crtc_vblank_int[3] ||
7506             atomic_read(&rdev->irq.pflip[3])) {
7507                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7508                 crtc4 |= VBLANK_INTERRUPT_MASK;
7509         }
7510         if (rdev->irq.crtc_vblank_int[4] ||
7511             atomic_read(&rdev->irq.pflip[4])) {
7512                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7513                 crtc5 |= VBLANK_INTERRUPT_MASK;
7514         }
7515         if (rdev->irq.crtc_vblank_int[5] ||
7516             atomic_read(&rdev->irq.pflip[5])) {
7517                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7518                 crtc6 |= VBLANK_INTERRUPT_MASK;
7519         }
7520         if (rdev->irq.hpd[0]) {
7521                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7522                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7523         }
7524         if (rdev->irq.hpd[1]) {
7525                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7526                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7527         }
7528         if (rdev->irq.hpd[2]) {
7529                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7530                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7531         }
7532         if (rdev->irq.hpd[3]) {
7533                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7534                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7535         }
7536         if (rdev->irq.hpd[4]) {
7537                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7538                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7539         }
7540         if (rdev->irq.hpd[5]) {
7541                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7542                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7543         }
7544
7545         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7546
7547         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7548         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7549
7550         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7551
7552         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7553
7554         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7555         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7556         if (rdev->num_crtc >= 4) {
7557                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7558                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7559         }
7560         if (rdev->num_crtc >= 6) {
7561                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7562                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7563         }
7564
7565         if (rdev->num_crtc >= 2) {
7566                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7567                        GRPH_PFLIP_INT_MASK);
7568                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7569                        GRPH_PFLIP_INT_MASK);
7570         }
7571         if (rdev->num_crtc >= 4) {
7572                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7573                        GRPH_PFLIP_INT_MASK);
7574                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7575                        GRPH_PFLIP_INT_MASK);
7576         }
7577         if (rdev->num_crtc >= 6) {
7578                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7579                        GRPH_PFLIP_INT_MASK);
7580                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7581                        GRPH_PFLIP_INT_MASK);
7582         }
7583
7584         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7585         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7586         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7587         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7588         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7589         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7590
7591         /* posting read */
7592         RREG32(SRBM_STATUS);
7593
7594         return 0;
7595 }
7596
7597 /**
7598  * cik_irq_ack - ack interrupt sources
7599  *
7600  * @rdev: radeon_device pointer
7601  *
7602  * Ack interrupt sources on the GPU (vblanks, hpd,
7603  * etc.) (CIK).  Certain interrupts sources are sw
7604  * generated and do not require an explicit ack.
7605  */
7606 static inline void cik_irq_ack(struct radeon_device *rdev)
7607 {
7608         u32 tmp;
7609
7610         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7611         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7612         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7613         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7614         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7615         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7616         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7617
7618         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7619                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7620         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7621                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7622         if (rdev->num_crtc >= 4) {
7623                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7624                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7625                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7626                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7627         }
7628         if (rdev->num_crtc >= 6) {
7629                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7630                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7631                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7632                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7633         }
7634
7635         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7636                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7637                        GRPH_PFLIP_INT_CLEAR);
7638         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7639                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7640                        GRPH_PFLIP_INT_CLEAR);
7641         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7642                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7643         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7644                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7645         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7646                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7647         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7648                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7649
7650         if (rdev->num_crtc >= 4) {
7651                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7652                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7653                                GRPH_PFLIP_INT_CLEAR);
7654                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7655                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7656                                GRPH_PFLIP_INT_CLEAR);
7657                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7658                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7659                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7660                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7661                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7662                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7663                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7664                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7665         }
7666
7667         if (rdev->num_crtc >= 6) {
7668                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7669                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7670                                GRPH_PFLIP_INT_CLEAR);
7671                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7672                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7673                                GRPH_PFLIP_INT_CLEAR);
7674                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7675                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7676                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7677                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7678                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7679                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7680                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7681                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7682         }
7683
7684         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7685                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7686                 tmp |= DC_HPDx_INT_ACK;
7687                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7688         }
7689         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7690                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7691                 tmp |= DC_HPDx_INT_ACK;
7692                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7693         }
7694         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7695                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7696                 tmp |= DC_HPDx_INT_ACK;
7697                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7698         }
7699         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7700                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7701                 tmp |= DC_HPDx_INT_ACK;
7702                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7703         }
7704         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7705                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7706                 tmp |= DC_HPDx_INT_ACK;
7707                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7708         }
7709         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7710                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7711                 tmp |= DC_HPDx_INT_ACK;
7712                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7713         }
7714         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7715                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7716                 tmp |= DC_HPDx_RX_INT_ACK;
7717                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7718         }
7719         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7720                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7721                 tmp |= DC_HPDx_RX_INT_ACK;
7722                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7723         }
7724         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7725                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7726                 tmp |= DC_HPDx_RX_INT_ACK;
7727                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7728         }
7729         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7730                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7731                 tmp |= DC_HPDx_RX_INT_ACK;
7732                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7733         }
7734         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7735                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7736                 tmp |= DC_HPDx_RX_INT_ACK;
7737                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7738         }
7739         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7740                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7741                 tmp |= DC_HPDx_RX_INT_ACK;
7742                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7743         }
7744 }
7745
7746 /**
7747  * cik_irq_disable - disable interrupts
7748  *
7749  * @rdev: radeon_device pointer
7750  *
7751  * Disable interrupts on the hw (CIK).
7752  */
7753 static void cik_irq_disable(struct radeon_device *rdev)
7754 {
7755         cik_disable_interrupts(rdev);
7756         /* Wait and acknowledge irq */
7757         mdelay(1);
7758         cik_irq_ack(rdev);
7759         cik_disable_interrupt_state(rdev);
7760 }
7761
7762 /**
7763  * cik_irq_disable - disable interrupts for suspend
7764  *
7765  * @rdev: radeon_device pointer
7766  *
7767  * Disable interrupts and stop the RLC (CIK).
7768  * Used for suspend.
7769  */
7770 static void cik_irq_suspend(struct radeon_device *rdev)
7771 {
7772         cik_irq_disable(rdev);
7773         cik_rlc_stop(rdev);
7774 }
7775
7776 /**
7777  * cik_irq_fini - tear down interrupt support
7778  *
7779  * @rdev: radeon_device pointer
7780  *
7781  * Disable interrupts on the hw and free the IH ring
7782  * buffer (CIK).
7783  * Used for driver unload.
7784  */
7785 static void cik_irq_fini(struct radeon_device *rdev)
7786 {
7787         cik_irq_suspend(rdev);
7788         r600_ih_ring_fini(rdev);
7789 }
7790
7791 /**
7792  * cik_get_ih_wptr - get the IH ring buffer wptr
7793  *
7794  * @rdev: radeon_device pointer
7795  *
7796  * Get the IH ring buffer wptr from either the register
7797  * or the writeback memory buffer (CIK).  Also check for
7798  * ring buffer overflow and deal with it.
7799  * Used by cik_irq_process().
7800  * Returns the value of the wptr.
7801  */
7802 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7803 {
7804         u32 wptr, tmp;
7805
7806         if (rdev->wb.enabled)
7807                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7808         else
7809                 wptr = RREG32(IH_RB_WPTR);
7810
7811         if (wptr & RB_OVERFLOW) {
7812                 wptr &= ~RB_OVERFLOW;
7813                 /* When a ring buffer overflow happen start parsing interrupt
7814                  * from the last not overwritten vector (wptr + 16). Hopefully
7815                  * this should allow us to catchup.
7816                  */
7817                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7818                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7819                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7820                 tmp = RREG32(IH_RB_CNTL);
7821                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7822                 WREG32(IH_RB_CNTL, tmp);
7823         }
7824         return (wptr & rdev->ih.ptr_mask);
7825 }
7826
7827 /*        CIK IV Ring
7828  * Each IV ring entry is 128 bits:
7829  * [7:0]    - interrupt source id
7830  * [31:8]   - reserved
7831  * [59:32]  - interrupt source data
7832  * [63:60]  - reserved
7833  * [71:64]  - RINGID
7834  *            CP:
7835  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7836  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7837  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7838  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7839  *            PIPE_ID - ME0 0=3D
7840  *                    - ME1&2 compute dispatcher (4 pipes each)
7841  *            SDMA:
7842  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7843  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7844  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7845  * [79:72]  - VMID
7846  * [95:80]  - PASID
7847  * [127:96] - reserved
7848  */
7849 /**
7850  * cik_irq_process - interrupt handler
7851  *
7852  * @rdev: radeon_device pointer
7853  *
7854  * Interrupt hander (CIK).  Walk the IH ring,
7855  * ack interrupts and schedule work to handle
7856  * interrupt events.
7857  * Returns irq process return code.
7858  */
7859 int cik_irq_process(struct radeon_device *rdev)
7860 {
7861         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7862         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7863         u32 wptr;
7864         u32 rptr;
7865         u32 src_id, src_data, ring_id;
7866         u8 me_id, pipe_id, queue_id;
7867         u32 ring_index;
7868         bool queue_hotplug = false;
7869         bool queue_dp = false;
7870         bool queue_reset = false;
7871         u32 addr, status, mc_client;
7872         bool queue_thermal = false;
7873
7874         if (!rdev->ih.enabled || rdev->shutdown)
7875                 return IRQ_NONE;
7876
7877         wptr = cik_get_ih_wptr(rdev);
7878
7879 restart_ih:
7880         /* is somebody else already processing irqs? */
7881         if (atomic_xchg(&rdev->ih.lock, 1))
7882                 return IRQ_NONE;
7883
7884         rptr = rdev->ih.rptr;
7885         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7886
7887         /* Order reading of wptr vs. reading of IH ring data */
7888         rmb();
7889
7890         /* display interrupts */
7891         cik_irq_ack(rdev);
7892
7893         while (rptr != wptr) {
7894                 /* wptr/rptr are in bytes! */
7895                 ring_index = rptr / 4;
7896
7897                 radeon_kfd_interrupt(rdev,
7898                                 (const void *) &rdev->ih.ring[ring_index]);
7899
7900                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7901                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7902                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7903
7904                 switch (src_id) {
7905                 case 1: /* D1 vblank/vline */
7906                         switch (src_data) {
7907                         case 0: /* D1 vblank */
7908                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7909                                         if (rdev->irq.crtc_vblank_int[0]) {
7910                                                 drm_handle_vblank(rdev->ddev, 0);
7911                                                 rdev->pm.vblank_sync = true;
7912                                                 wake_up(&rdev->irq.vblank_queue);
7913                                         }
7914                                         if (atomic_read(&rdev->irq.pflip[0]))
7915                                                 radeon_crtc_handle_vblank(rdev, 0);
7916                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7917                                         DRM_DEBUG("IH: D1 vblank\n");
7918                                 }
7919                                 break;
7920                         case 1: /* D1 vline */
7921                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7922                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7923                                         DRM_DEBUG("IH: D1 vline\n");
7924                                 }
7925                                 break;
7926                         default:
7927                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7928                                 break;
7929                         }
7930                         break;
7931                 case 2: /* D2 vblank/vline */
7932                         switch (src_data) {
7933                         case 0: /* D2 vblank */
7934                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7935                                         if (rdev->irq.crtc_vblank_int[1]) {
7936                                                 drm_handle_vblank(rdev->ddev, 1);
7937                                                 rdev->pm.vblank_sync = true;
7938                                                 wake_up(&rdev->irq.vblank_queue);
7939                                         }
7940                                         if (atomic_read(&rdev->irq.pflip[1]))
7941                                                 radeon_crtc_handle_vblank(rdev, 1);
7942                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7943                                         DRM_DEBUG("IH: D2 vblank\n");
7944                                 }
7945                                 break;
7946                         case 1: /* D2 vline */
7947                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7948                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7949                                         DRM_DEBUG("IH: D2 vline\n");
7950                                 }
7951                                 break;
7952                         default:
7953                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7954                                 break;
7955                         }
7956                         break;
7957                 case 3: /* D3 vblank/vline */
7958                         switch (src_data) {
7959                         case 0: /* D3 vblank */
7960                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7961                                         if (rdev->irq.crtc_vblank_int[2]) {
7962                                                 drm_handle_vblank(rdev->ddev, 2);
7963                                                 rdev->pm.vblank_sync = true;
7964                                                 wake_up(&rdev->irq.vblank_queue);
7965                                         }
7966                                         if (atomic_read(&rdev->irq.pflip[2]))
7967                                                 radeon_crtc_handle_vblank(rdev, 2);
7968                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7969                                         DRM_DEBUG("IH: D3 vblank\n");
7970                                 }
7971                                 break;
7972                         case 1: /* D3 vline */
7973                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7974                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7975                                         DRM_DEBUG("IH: D3 vline\n");
7976                                 }
7977                                 break;
7978                         default:
7979                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7980                                 break;
7981                         }
7982                         break;
7983                 case 4: /* D4 vblank/vline */
7984                         switch (src_data) {
7985                         case 0: /* D4 vblank */
7986                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7987                                         if (rdev->irq.crtc_vblank_int[3]) {
7988                                                 drm_handle_vblank(rdev->ddev, 3);
7989                                                 rdev->pm.vblank_sync = true;
7990                                                 wake_up(&rdev->irq.vblank_queue);
7991                                         }
7992                                         if (atomic_read(&rdev->irq.pflip[3]))
7993                                                 radeon_crtc_handle_vblank(rdev, 3);
7994                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7995                                         DRM_DEBUG("IH: D4 vblank\n");
7996                                 }
7997                                 break;
7998                         case 1: /* D4 vline */
7999                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
8000                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8001                                         DRM_DEBUG("IH: D4 vline\n");
8002                                 }
8003                                 break;
8004                         default:
8005                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8006                                 break;
8007                         }
8008                         break;
8009                 case 5: /* D5 vblank/vline */
8010                         switch (src_data) {
8011                         case 0: /* D5 vblank */
8012                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
8013                                         if (rdev->irq.crtc_vblank_int[4]) {
8014                                                 drm_handle_vblank(rdev->ddev, 4);
8015                                                 rdev->pm.vblank_sync = true;
8016                                                 wake_up(&rdev->irq.vblank_queue);
8017                                         }
8018                                         if (atomic_read(&rdev->irq.pflip[4]))
8019                                                 radeon_crtc_handle_vblank(rdev, 4);
8020                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8021                                         DRM_DEBUG("IH: D5 vblank\n");
8022                                 }
8023                                 break;
8024                         case 1: /* D5 vline */
8025                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
8026                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8027                                         DRM_DEBUG("IH: D5 vline\n");
8028                                 }
8029                                 break;
8030                         default:
8031                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8032                                 break;
8033                         }
8034                         break;
8035                 case 6: /* D6 vblank/vline */
8036                         switch (src_data) {
8037                         case 0: /* D6 vblank */
8038                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
8039                                         if (rdev->irq.crtc_vblank_int[5]) {
8040                                                 drm_handle_vblank(rdev->ddev, 5);
8041                                                 rdev->pm.vblank_sync = true;
8042                                                 wake_up(&rdev->irq.vblank_queue);
8043                                         }
8044                                         if (atomic_read(&rdev->irq.pflip[5]))
8045                                                 radeon_crtc_handle_vblank(rdev, 5);
8046                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8047                                         DRM_DEBUG("IH: D6 vblank\n");
8048                                 }
8049                                 break;
8050                         case 1: /* D6 vline */
8051                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
8052                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8053                                         DRM_DEBUG("IH: D6 vline\n");
8054                                 }
8055                                 break;
8056                         default:
8057                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8058                                 break;
8059                         }
8060                         break;
8061                 case 8: /* D1 page flip */
8062                 case 10: /* D2 page flip */
8063                 case 12: /* D3 page flip */
8064                 case 14: /* D4 page flip */
8065                 case 16: /* D5 page flip */
8066                 case 18: /* D6 page flip */
8067                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8068                         if (radeon_use_pflipirq > 0)
8069                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8070                         break;
8071                 case 42: /* HPD hotplug */
8072                         switch (src_data) {
8073                         case 0:
8074                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8075                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8076                                         queue_hotplug = true;
8077                                         DRM_DEBUG("IH: HPD1\n");
8078                                 }
8079                                 break;
8080                         case 1:
8081                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8082                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8083                                         queue_hotplug = true;
8084                                         DRM_DEBUG("IH: HPD2\n");
8085                                 }
8086                                 break;
8087                         case 2:
8088                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8089                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8090                                         queue_hotplug = true;
8091                                         DRM_DEBUG("IH: HPD3\n");
8092                                 }
8093                                 break;
8094                         case 3:
8095                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8096                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8097                                         queue_hotplug = true;
8098                                         DRM_DEBUG("IH: HPD4\n");
8099                                 }
8100                                 break;
8101                         case 4:
8102                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8103                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8104                                         queue_hotplug = true;
8105                                         DRM_DEBUG("IH: HPD5\n");
8106                                 }
8107                                 break;
8108                         case 5:
8109                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8110                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8111                                         queue_hotplug = true;
8112                                         DRM_DEBUG("IH: HPD6\n");
8113                                 }
8114                                 break;
8115                         case 6:
8116                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
8117                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8118                                         queue_dp = true;
8119                                         DRM_DEBUG("IH: HPD_RX 1\n");
8120                                 }
8121                                 break;
8122                         case 7:
8123                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
8124                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8125                                         queue_dp = true;
8126                                         DRM_DEBUG("IH: HPD_RX 2\n");
8127                                 }
8128                                 break;
8129                         case 8:
8130                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
8131                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8132                                         queue_dp = true;
8133                                         DRM_DEBUG("IH: HPD_RX 3\n");
8134                                 }
8135                                 break;
8136                         case 9:
8137                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
8138                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8139                                         queue_dp = true;
8140                                         DRM_DEBUG("IH: HPD_RX 4\n");
8141                                 }
8142                                 break;
8143                         case 10:
8144                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
8145                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8146                                         queue_dp = true;
8147                                         DRM_DEBUG("IH: HPD_RX 5\n");
8148                                 }
8149                                 break;
8150                         case 11:
8151                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
8152                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8153                                         queue_dp = true;
8154                                         DRM_DEBUG("IH: HPD_RX 6\n");
8155                                 }
8156                                 break;
8157                         default:
8158                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8159                                 break;
8160                         }
8161                         break;
8162                 case 96:
8163                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8164                         WREG32(SRBM_INT_ACK, 0x1);
8165                         break;
8166                 case 124: /* UVD */
8167                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8168                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8169                         break;
8170                 case 146:
8171                 case 147:
8172                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8173                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8174                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8175                         /* reset addr and status */
8176                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8177                         if (addr == 0x0 && status == 0x0)
8178                                 break;
8179                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8180                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8181                                 addr);
8182                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8183                                 status);
8184                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8185                         break;
8186                 case 167: /* VCE */
8187                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8188                         switch (src_data) {
8189                         case 0:
8190                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8191                                 break;
8192                         case 1:
8193                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8194                                 break;
8195                         default:
8196                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8197                                 break;
8198                         }
8199                         break;
8200                 case 176: /* GFX RB CP_INT */
8201                 case 177: /* GFX IB CP_INT */
8202                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8203                         break;
8204                 case 181: /* CP EOP event */
8205                         DRM_DEBUG("IH: CP EOP\n");
8206                         /* XXX check the bitfield order! */
8207                         me_id = (ring_id & 0x60) >> 5;
8208                         pipe_id = (ring_id & 0x18) >> 3;
8209                         queue_id = (ring_id & 0x7) >> 0;
8210                         switch (me_id) {
8211                         case 0:
8212                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8213                                 break;
8214                         case 1:
8215                         case 2:
8216                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8217                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8218                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8219                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8220                                 break;
8221                         }
8222                         break;
8223                 case 184: /* CP Privileged reg access */
8224                         DRM_ERROR("Illegal register access in command stream\n");
8225                         /* XXX check the bitfield order! */
8226                         me_id = (ring_id & 0x60) >> 5;
8227                         pipe_id = (ring_id & 0x18) >> 3;
8228                         queue_id = (ring_id & 0x7) >> 0;
8229                         switch (me_id) {
8230                         case 0:
8231                                 /* This results in a full GPU reset, but all we need to do is soft
8232                                  * reset the CP for gfx
8233                                  */
8234                                 queue_reset = true;
8235                                 break;
8236                         case 1:
8237                                 /* XXX compute */
8238                                 queue_reset = true;
8239                                 break;
8240                         case 2:
8241                                 /* XXX compute */
8242                                 queue_reset = true;
8243                                 break;
8244                         }
8245                         break;
8246                 case 185: /* CP Privileged inst */
8247                         DRM_ERROR("Illegal instruction in command stream\n");
8248                         /* XXX check the bitfield order! */
8249                         me_id = (ring_id & 0x60) >> 5;
8250                         pipe_id = (ring_id & 0x18) >> 3;
8251                         queue_id = (ring_id & 0x7) >> 0;
8252                         switch (me_id) {
8253                         case 0:
8254                                 /* This results in a full GPU reset, but all we need to do is soft
8255                                  * reset the CP for gfx
8256                                  */
8257                                 queue_reset = true;
8258                                 break;
8259                         case 1:
8260                                 /* XXX compute */
8261                                 queue_reset = true;
8262                                 break;
8263                         case 2:
8264                                 /* XXX compute */
8265                                 queue_reset = true;
8266                                 break;
8267                         }
8268                         break;
8269                 case 224: /* SDMA trap event */
8270                         /* XXX check the bitfield order! */
8271                         me_id = (ring_id & 0x3) >> 0;
8272                         queue_id = (ring_id & 0xc) >> 2;
8273                         DRM_DEBUG("IH: SDMA trap\n");
8274                         switch (me_id) {
8275                         case 0:
8276                                 switch (queue_id) {
8277                                 case 0:
8278                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8279                                         break;
8280                                 case 1:
8281                                         /* XXX compute */
8282                                         break;
8283                                 case 2:
8284                                         /* XXX compute */
8285                                         break;
8286                                 }
8287                                 break;
8288                         case 1:
8289                                 switch (queue_id) {
8290                                 case 0:
8291                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8292                                         break;
8293                                 case 1:
8294                                         /* XXX compute */
8295                                         break;
8296                                 case 2:
8297                                         /* XXX compute */
8298                                         break;
8299                                 }
8300                                 break;
8301                         }
8302                         break;
8303                 case 230: /* thermal low to high */
8304                         DRM_DEBUG("IH: thermal low to high\n");
8305                         rdev->pm.dpm.thermal.high_to_low = false;
8306                         queue_thermal = true;
8307                         break;
8308                 case 231: /* thermal high to low */
8309                         DRM_DEBUG("IH: thermal high to low\n");
8310                         rdev->pm.dpm.thermal.high_to_low = true;
8311                         queue_thermal = true;
8312                         break;
8313                 case 233: /* GUI IDLE */
8314                         DRM_DEBUG("IH: GUI idle\n");
8315                         break;
8316                 case 241: /* SDMA Privileged inst */
8317                 case 247: /* SDMA Privileged inst */
8318                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8319                         /* XXX check the bitfield order! */
8320                         me_id = (ring_id & 0x3) >> 0;
8321                         queue_id = (ring_id & 0xc) >> 2;
8322                         switch (me_id) {
8323                         case 0:
8324                                 switch (queue_id) {
8325                                 case 0:
8326                                         queue_reset = true;
8327                                         break;
8328                                 case 1:
8329                                         /* XXX compute */
8330                                         queue_reset = true;
8331                                         break;
8332                                 case 2:
8333                                         /* XXX compute */
8334                                         queue_reset = true;
8335                                         break;
8336                                 }
8337                                 break;
8338                         case 1:
8339                                 switch (queue_id) {
8340                                 case 0:
8341                                         queue_reset = true;
8342                                         break;
8343                                 case 1:
8344                                         /* XXX compute */
8345                                         queue_reset = true;
8346                                         break;
8347                                 case 2:
8348                                         /* XXX compute */
8349                                         queue_reset = true;
8350                                         break;
8351                                 }
8352                                 break;
8353                         }
8354                         break;
8355                 default:
8356                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8357                         break;
8358                 }
8359
8360                 /* wptr/rptr are in bytes! */
8361                 rptr += 16;
8362                 rptr &= rdev->ih.ptr_mask;
8363                 WREG32(IH_RB_RPTR, rptr);
8364         }
8365         if (queue_dp)
8366                 schedule_work(&rdev->dp_work);
8367         if (queue_hotplug)
8368                 schedule_work(&rdev->hotplug_work);
8369         if (queue_reset) {
8370                 rdev->needs_reset = true;
8371                 wake_up_all(&rdev->fence_queue);
8372         }
8373         if (queue_thermal)
8374                 schedule_work(&rdev->pm.dpm.thermal.work);
8375         rdev->ih.rptr = rptr;
8376         atomic_set(&rdev->ih.lock, 0);
8377
8378         /* make sure wptr hasn't changed while processing */
8379         wptr = cik_get_ih_wptr(rdev);
8380         if (wptr != rptr)
8381                 goto restart_ih;
8382
8383         return IRQ_HANDLED;
8384 }
8385
8386 /*
8387  * startup/shutdown callbacks
8388  */
8389 /**
8390  * cik_startup - program the asic to a functional state
8391  *
8392  * @rdev: radeon_device pointer
8393  *
8394  * Programs the asic to a functional state (CIK).
8395  * Called by cik_init() and cik_resume().
8396  * Returns 0 for success, error for failure.
8397  */
8398 static int cik_startup(struct radeon_device *rdev)
8399 {
8400         struct radeon_ring *ring;
8401         u32 nop;
8402         int r;
8403
8404         /* enable pcie gen2/3 link */
8405         cik_pcie_gen3_enable(rdev);
8406         /* enable aspm */
8407         cik_program_aspm(rdev);
8408
8409         /* scratch needs to be initialized before MC */
8410         r = r600_vram_scratch_init(rdev);
8411         if (r)
8412                 return r;
8413
8414         cik_mc_program(rdev);
8415
8416         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8417                 r = ci_mc_load_microcode(rdev);
8418                 if (r) {
8419                         DRM_ERROR("Failed to load MC firmware!\n");
8420                         return r;
8421                 }
8422         }
8423
8424         r = cik_pcie_gart_enable(rdev);
8425         if (r)
8426                 return r;
8427         cik_gpu_init(rdev);
8428
8429         /* allocate rlc buffers */
8430         if (rdev->flags & RADEON_IS_IGP) {
8431                 if (rdev->family == CHIP_KAVERI) {
8432                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8433                         rdev->rlc.reg_list_size =
8434                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8435                 } else {
8436                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8437                         rdev->rlc.reg_list_size =
8438                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8439                 }
8440         }
8441         rdev->rlc.cs_data = ci_cs_data;
8442         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8443         r = sumo_rlc_init(rdev);
8444         if (r) {
8445                 DRM_ERROR("Failed to init rlc BOs!\n");
8446                 return r;
8447         }
8448
8449         /* allocate wb buffer */
8450         r = radeon_wb_init(rdev);
8451         if (r)
8452                 return r;
8453
8454         /* allocate mec buffers */
8455         r = cik_mec_init(rdev);
8456         if (r) {
8457                 DRM_ERROR("Failed to init MEC BOs!\n");
8458                 return r;
8459         }
8460
8461         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8462         if (r) {
8463                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8464                 return r;
8465         }
8466
8467         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8468         if (r) {
8469                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8470                 return r;
8471         }
8472
8473         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8474         if (r) {
8475                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8476                 return r;
8477         }
8478
8479         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8480         if (r) {
8481                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8482                 return r;
8483         }
8484
8485         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8486         if (r) {
8487                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8488                 return r;
8489         }
8490
8491         r = radeon_uvd_resume(rdev);
8492         if (!r) {
8493                 r = uvd_v4_2_resume(rdev);
8494                 if (!r) {
8495                         r = radeon_fence_driver_start_ring(rdev,
8496                                                            R600_RING_TYPE_UVD_INDEX);
8497                         if (r)
8498                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8499                 }
8500         }
8501         if (r)
8502                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8503
8504         r = radeon_vce_resume(rdev);
8505         if (!r) {
8506                 r = vce_v2_0_resume(rdev);
8507                 if (!r)
8508                         r = radeon_fence_driver_start_ring(rdev,
8509                                                            TN_RING_TYPE_VCE1_INDEX);
8510                 if (!r)
8511                         r = radeon_fence_driver_start_ring(rdev,
8512                                                            TN_RING_TYPE_VCE2_INDEX);
8513         }
8514         if (r) {
8515                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8516                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8517                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8518         }
8519
8520         /* Enable IRQ */
8521         if (!rdev->irq.installed) {
8522                 r = radeon_irq_kms_init(rdev);
8523                 if (r)
8524                         return r;
8525         }
8526
8527         r = cik_irq_init(rdev);
8528         if (r) {
8529                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8530                 radeon_irq_kms_fini(rdev);
8531                 return r;
8532         }
8533         cik_irq_set(rdev);
8534
8535         if (rdev->family == CHIP_HAWAII) {
8536                 if (rdev->new_fw)
8537                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8538                 else
8539                         nop = RADEON_CP_PACKET2;
8540         } else {
8541                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8542         }
8543
8544         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8545         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8546                              nop);
8547         if (r)
8548                 return r;
8549
8550         /* set up the compute queues */
8551         /* type-2 packets are deprecated on MEC, use type-3 instead */
8552         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8553         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8554                              nop);
8555         if (r)
8556                 return r;
8557         ring->me = 1; /* first MEC */
8558         ring->pipe = 0; /* first pipe */
8559         ring->queue = 0; /* first queue */
8560         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8561
8562         /* type-2 packets are deprecated on MEC, use type-3 instead */
8563         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8564         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8565                              nop);
8566         if (r)
8567                 return r;
8568         /* dGPU only have 1 MEC */
8569         ring->me = 1; /* first MEC */
8570         ring->pipe = 0; /* first pipe */
8571         ring->queue = 1; /* second queue */
8572         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8573
8574         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8575         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8576                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8577         if (r)
8578                 return r;
8579
8580         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8581         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8582                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8583         if (r)
8584                 return r;
8585
8586         r = cik_cp_resume(rdev);
8587         if (r)
8588                 return r;
8589
8590         r = cik_sdma_resume(rdev);
8591         if (r)
8592                 return r;
8593
8594         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8595         if (ring->ring_size) {
8596                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8597                                      RADEON_CP_PACKET2);
8598                 if (!r)
8599                         r = uvd_v1_0_init(rdev);
8600                 if (r)
8601                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8602         }
8603
8604         r = -ENOENT;
8605
8606         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8607         if (ring->ring_size)
8608                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8609                                      VCE_CMD_NO_OP);
8610
8611         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8612         if (ring->ring_size)
8613                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8614                                      VCE_CMD_NO_OP);
8615
8616         if (!r)
8617                 r = vce_v1_0_init(rdev);
8618         else if (r != -ENOENT)
8619                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8620
8621         r = radeon_ib_pool_init(rdev);
8622         if (r) {
8623                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8624                 return r;
8625         }
8626
8627         r = radeon_vm_manager_init(rdev);
8628         if (r) {
8629                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8630                 return r;
8631         }
8632
8633         r = radeon_audio_init(rdev);
8634         if (r)
8635                 return r;
8636
8637         r = radeon_kfd_resume(rdev);
8638         if (r)
8639                 return r;
8640
8641         return 0;
8642 }
8643
8644 /**
8645  * cik_resume - resume the asic to a functional state
8646  *
8647  * @rdev: radeon_device pointer
8648  *
8649  * Programs the asic to a functional state (CIK).
8650  * Called at resume.
8651  * Returns 0 for success, error for failure.
8652  */
8653 int cik_resume(struct radeon_device *rdev)
8654 {
8655         int r;
8656
8657         /* post card */
8658         atom_asic_init(rdev->mode_info.atom_context);
8659
8660         /* init golden registers */
8661         cik_init_golden_registers(rdev);
8662
8663         if (rdev->pm.pm_method == PM_METHOD_DPM)
8664                 radeon_pm_resume(rdev);
8665
8666         rdev->accel_working = true;
8667         r = cik_startup(rdev);
8668         if (r) {
8669                 DRM_ERROR("cik startup failed on resume\n");
8670                 rdev->accel_working = false;
8671                 return r;
8672         }
8673
8674         return r;
8675
8676 }
8677
8678 /**
8679  * cik_suspend - suspend the asic
8680  *
8681  * @rdev: radeon_device pointer
8682  *
8683  * Bring the chip into a state suitable for suspend (CIK).
8684  * Called at suspend.
8685  * Returns 0 for success.
8686  */
8687 int cik_suspend(struct radeon_device *rdev)
8688 {
8689         radeon_kfd_suspend(rdev);
8690         radeon_pm_suspend(rdev);
8691         radeon_audio_fini(rdev);
8692         radeon_vm_manager_fini(rdev);
8693         cik_cp_enable(rdev, false);
8694         cik_sdma_enable(rdev, false);
8695         uvd_v1_0_fini(rdev);
8696         radeon_uvd_suspend(rdev);
8697         radeon_vce_suspend(rdev);
8698         cik_fini_pg(rdev);
8699         cik_fini_cg(rdev);
8700         cik_irq_suspend(rdev);
8701         radeon_wb_disable(rdev);
8702         cik_pcie_gart_disable(rdev);
8703         return 0;
8704 }
8705
8706 /* Plan is to move initialization in that function and use
8707  * helper function so that radeon_device_init pretty much
8708  * do nothing more than calling asic specific function. This
8709  * should also allow to remove a bunch of callback function
8710  * like vram_info.
8711  */
8712 /**
8713  * cik_init - asic specific driver and hw init
8714  *
8715  * @rdev: radeon_device pointer
8716  *
8717  * Setup asic specific driver variables and program the hw
8718  * to a functional state (CIK).
8719  * Called at driver startup.
8720  * Returns 0 for success, errors for failure.
8721  */
8722 int cik_init(struct radeon_device *rdev)
8723 {
8724         struct radeon_ring *ring;
8725         int r;
8726
8727         /* Read BIOS */
8728         if (!radeon_get_bios(rdev)) {
8729                 if (ASIC_IS_AVIVO(rdev))
8730                         return -EINVAL;
8731         }
8732         /* Must be an ATOMBIOS */
8733         if (!rdev->is_atom_bios) {
8734                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8735                 return -EINVAL;
8736         }
8737         r = radeon_atombios_init(rdev);
8738         if (r)
8739                 return r;
8740
8741         /* Post card if necessary */
8742         if (!radeon_card_posted(rdev)) {
8743                 if (!rdev->bios) {
8744                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8745                         return -EINVAL;
8746                 }
8747                 DRM_INFO("GPU not posted. posting now...\n");
8748                 atom_asic_init(rdev->mode_info.atom_context);
8749         }
8750         /* init golden registers */
8751         cik_init_golden_registers(rdev);
8752         /* Initialize scratch registers */
8753         cik_scratch_init(rdev);
8754         /* Initialize surface registers */
8755         radeon_surface_init(rdev);
8756         /* Initialize clocks */
8757         radeon_get_clock_info(rdev->ddev);
8758
8759         /* Fence driver */
8760         r = radeon_fence_driver_init(rdev);
8761         if (r)
8762                 return r;
8763
8764         /* initialize memory controller */
8765         r = cik_mc_init(rdev);
8766         if (r)
8767                 return r;
8768         /* Memory manager */
8769         r = radeon_bo_init(rdev);
8770         if (r)
8771                 return r;
8772
8773         if (rdev->flags & RADEON_IS_IGP) {
8774                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8775                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8776                         r = cik_init_microcode(rdev);
8777                         if (r) {
8778                                 DRM_ERROR("Failed to load firmware!\n");
8779                                 return r;
8780                         }
8781                 }
8782         } else {
8783                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8784                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8785                     !rdev->mc_fw) {
8786                         r = cik_init_microcode(rdev);
8787                         if (r) {
8788                                 DRM_ERROR("Failed to load firmware!\n");
8789                                 return r;
8790                         }
8791                 }
8792         }
8793
8794         /* Initialize power management */
8795         radeon_pm_init(rdev);
8796
8797         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8798         ring->ring_obj = NULL;
8799         r600_ring_init(rdev, ring, 1024 * 1024);
8800
8801         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8802         ring->ring_obj = NULL;
8803         r600_ring_init(rdev, ring, 1024 * 1024);
8804         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8805         if (r)
8806                 return r;
8807
8808         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8809         ring->ring_obj = NULL;
8810         r600_ring_init(rdev, ring, 1024 * 1024);
8811         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8812         if (r)
8813                 return r;
8814
8815         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8816         ring->ring_obj = NULL;
8817         r600_ring_init(rdev, ring, 256 * 1024);
8818
8819         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8820         ring->ring_obj = NULL;
8821         r600_ring_init(rdev, ring, 256 * 1024);
8822
8823         r = radeon_uvd_init(rdev);
8824         if (!r) {
8825                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8826                 ring->ring_obj = NULL;
8827                 r600_ring_init(rdev, ring, 4096);
8828         }
8829
8830         r = radeon_vce_init(rdev);
8831         if (!r) {
8832                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8833                 ring->ring_obj = NULL;
8834                 r600_ring_init(rdev, ring, 4096);
8835
8836                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8837                 ring->ring_obj = NULL;
8838                 r600_ring_init(rdev, ring, 4096);
8839         }
8840
8841         rdev->ih.ring_obj = NULL;
8842         r600_ih_ring_init(rdev, 64 * 1024);
8843
8844         r = r600_pcie_gart_init(rdev);
8845         if (r)
8846                 return r;
8847
8848         rdev->accel_working = true;
8849         r = cik_startup(rdev);
8850         if (r) {
8851                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8852                 cik_cp_fini(rdev);
8853                 cik_sdma_fini(rdev);
8854                 cik_irq_fini(rdev);
8855                 sumo_rlc_fini(rdev);
8856                 cik_mec_fini(rdev);
8857                 radeon_wb_fini(rdev);
8858                 radeon_ib_pool_fini(rdev);
8859                 radeon_vm_manager_fini(rdev);
8860                 radeon_irq_kms_fini(rdev);
8861                 cik_pcie_gart_fini(rdev);
8862                 rdev->accel_working = false;
8863         }
8864
8865         /* Don't start up if the MC ucode is missing.
8866          * The default clocks and voltages before the MC ucode
8867          * is loaded are not suffient for advanced operations.
8868          */
8869         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8870                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8871                 return -EINVAL;
8872         }
8873
8874         return 0;
8875 }
8876
8877 /**
8878  * cik_fini - asic specific driver and hw fini
8879  *
8880  * @rdev: radeon_device pointer
8881  *
8882  * Tear down the asic specific driver variables and program the hw
8883  * to an idle state (CIK).
8884  * Called at driver unload.
8885  */
8886 void cik_fini(struct radeon_device *rdev)
8887 {
8888         radeon_pm_fini(rdev);
8889         cik_cp_fini(rdev);
8890         cik_sdma_fini(rdev);
8891         cik_fini_pg(rdev);
8892         cik_fini_cg(rdev);
8893         cik_irq_fini(rdev);
8894         sumo_rlc_fini(rdev);
8895         cik_mec_fini(rdev);
8896         radeon_wb_fini(rdev);
8897         radeon_vm_manager_fini(rdev);
8898         radeon_ib_pool_fini(rdev);
8899         radeon_irq_kms_fini(rdev);
8900         uvd_v1_0_fini(rdev);
8901         radeon_uvd_fini(rdev);
8902         radeon_vce_fini(rdev);
8903         cik_pcie_gart_fini(rdev);
8904         r600_vram_scratch_fini(rdev);
8905         radeon_gem_fini(rdev);
8906         radeon_fence_driver_fini(rdev);
8907         radeon_bo_fini(rdev);
8908         radeon_atombios_fini(rdev);
8909         kfree(rdev->bios);
8910         rdev->bios = NULL;
8911 }
8912
8913 void dce8_program_fmt(struct drm_encoder *encoder)
8914 {
8915         struct drm_device *dev = encoder->dev;
8916         struct radeon_device *rdev = dev->dev_private;
8917         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8918         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8919         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8920         int bpc = 0;
8921         u32 tmp = 0;
8922         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8923
8924         if (connector) {
8925                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8926                 bpc = radeon_get_monitor_bpc(connector);
8927                 dither = radeon_connector->dither;
8928         }
8929
8930         /* LVDS/eDP FMT is set up by atom */
8931         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8932                 return;
8933
8934         /* not needed for analog */
8935         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8936             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8937                 return;
8938
8939         if (bpc == 0)
8940                 return;
8941
8942         switch (bpc) {
8943         case 6:
8944                 if (dither == RADEON_FMT_DITHER_ENABLE)
8945                         /* XXX sort out optimal dither settings */
8946                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8947                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8948                 else
8949                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8950                 break;
8951         case 8:
8952                 if (dither == RADEON_FMT_DITHER_ENABLE)
8953                         /* XXX sort out optimal dither settings */
8954                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8955                                 FMT_RGB_RANDOM_ENABLE |
8956                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8957                 else
8958                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8959                 break;
8960         case 10:
8961                 if (dither == RADEON_FMT_DITHER_ENABLE)
8962                         /* XXX sort out optimal dither settings */
8963                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8964                                 FMT_RGB_RANDOM_ENABLE |
8965                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8966                 else
8967                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8968                 break;
8969         default:
8970                 /* not needed */
8971                 break;
8972         }
8973
8974         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8975 }
8976
8977 /* display watermark setup */
8978 /**
8979  * dce8_line_buffer_adjust - Set up the line buffer
8980  *
8981  * @rdev: radeon_device pointer
8982  * @radeon_crtc: the selected display controller
8983  * @mode: the current display mode on the selected display
8984  * controller
8985  *
8986  * Setup up the line buffer allocation for
8987  * the selected display controller (CIK).
8988  * Returns the line buffer size in pixels.
8989  */
8990 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8991                                    struct radeon_crtc *radeon_crtc,
8992                                    struct drm_display_mode *mode)
8993 {
8994         u32 tmp, buffer_alloc, i;
8995         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8996         /*
8997          * Line Buffer Setup
8998          * There are 6 line buffers, one for each display controllers.
8999          * There are 3 partitions per LB. Select the number of partitions
9000          * to enable based on the display width.  For display widths larger
9001          * than 4096, you need use to use 2 display controllers and combine
9002          * them using the stereo blender.
9003          */
9004         if (radeon_crtc->base.enabled && mode) {
9005                 if (mode->crtc_hdisplay < 1920) {
9006                         tmp = 1;
9007                         buffer_alloc = 2;
9008                 } else if (mode->crtc_hdisplay < 2560) {
9009                         tmp = 2;
9010                         buffer_alloc = 2;
9011                 } else if (mode->crtc_hdisplay < 4096) {
9012                         tmp = 0;
9013                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9014                 } else {
9015                         DRM_DEBUG_KMS("Mode too big for LB!\n");
9016                         tmp = 0;
9017                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9018                 }
9019         } else {
9020                 tmp = 1;
9021                 buffer_alloc = 0;
9022         }
9023
9024         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9025                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9026
9027         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9028                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9029         for (i = 0; i < rdev->usec_timeout; i++) {
9030                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9031                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
9032                         break;
9033                 udelay(1);
9034         }
9035
9036         if (radeon_crtc->base.enabled && mode) {
9037                 switch (tmp) {
9038                 case 0:
9039                 default:
9040                         return 4096 * 2;
9041                 case 1:
9042                         return 1920 * 2;
9043                 case 2:
9044                         return 2560 * 2;
9045                 }
9046         }
9047
9048         /* controller not enabled, so no lb used */
9049         return 0;
9050 }
9051
9052 /**
9053  * cik_get_number_of_dram_channels - get the number of dram channels
9054  *
9055  * @rdev: radeon_device pointer
9056  *
9057  * Look up the number of video ram channels (CIK).
9058  * Used for display watermark bandwidth calculations
9059  * Returns the number of dram channels
9060  */
9061 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9062 {
9063         u32 tmp = RREG32(MC_SHARED_CHMAP);
9064
9065         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9066         case 0:
9067         default:
9068                 return 1;
9069         case 1:
9070                 return 2;
9071         case 2:
9072                 return 4;
9073         case 3:
9074                 return 8;
9075         case 4:
9076                 return 3;
9077         case 5:
9078                 return 6;
9079         case 6:
9080                 return 10;
9081         case 7:
9082                 return 12;
9083         case 8:
9084                 return 16;
9085         }
9086 }
9087
9088 struct dce8_wm_params {
9089         u32 dram_channels; /* number of dram channels */
9090         u32 yclk;          /* bandwidth per dram data pin in kHz */
9091         u32 sclk;          /* engine clock in kHz */
9092         u32 disp_clk;      /* display clock in kHz */
9093         u32 src_width;     /* viewport width */
9094         u32 active_time;   /* active display time in ns */
9095         u32 blank_time;    /* blank time in ns */
9096         bool interlaced;    /* mode is interlaced */
9097         fixed20_12 vsc;    /* vertical scale ratio */
9098         u32 num_heads;     /* number of active crtcs */
9099         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9100         u32 lb_size;       /* line buffer allocated to pipe */
9101         u32 vtaps;         /* vertical scaler taps */
9102 };
9103
9104 /**
9105  * dce8_dram_bandwidth - get the dram bandwidth
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the raw dram bandwidth (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the dram bandwidth in MBytes/s
9112  */
9113 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9114 {
9115         /* Calculate raw DRAM Bandwidth */
9116         fixed20_12 dram_efficiency; /* 0.7 */
9117         fixed20_12 yclk, dram_channels, bandwidth;
9118         fixed20_12 a;
9119
9120         a.full = dfixed_const(1000);
9121         yclk.full = dfixed_const(wm->yclk);
9122         yclk.full = dfixed_div(yclk, a);
9123         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9124         a.full = dfixed_const(10);
9125         dram_efficiency.full = dfixed_const(7);
9126         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9127         bandwidth.full = dfixed_mul(dram_channels, yclk);
9128         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9129
9130         return dfixed_trunc(bandwidth);
9131 }
9132
9133 /**
9134  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9135  *
9136  * @wm: watermark calculation data
9137  *
9138  * Calculate the dram bandwidth used for display (CIK).
9139  * Used for display watermark bandwidth calculations
9140  * Returns the dram bandwidth for display in MBytes/s
9141  */
9142 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9143 {
9144         /* Calculate DRAM Bandwidth and the part allocated to display. */
9145         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9146         fixed20_12 yclk, dram_channels, bandwidth;
9147         fixed20_12 a;
9148
9149         a.full = dfixed_const(1000);
9150         yclk.full = dfixed_const(wm->yclk);
9151         yclk.full = dfixed_div(yclk, a);
9152         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9153         a.full = dfixed_const(10);
9154         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9155         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9156         bandwidth.full = dfixed_mul(dram_channels, yclk);
9157         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9158
9159         return dfixed_trunc(bandwidth);
9160 }
9161
9162 /**
9163  * dce8_data_return_bandwidth - get the data return bandwidth
9164  *
9165  * @wm: watermark calculation data
9166  *
9167  * Calculate the data return bandwidth used for display (CIK).
9168  * Used for display watermark bandwidth calculations
9169  * Returns the data return bandwidth in MBytes/s
9170  */
9171 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9172 {
9173         /* Calculate the display Data return Bandwidth */
9174         fixed20_12 return_efficiency; /* 0.8 */
9175         fixed20_12 sclk, bandwidth;
9176         fixed20_12 a;
9177
9178         a.full = dfixed_const(1000);
9179         sclk.full = dfixed_const(wm->sclk);
9180         sclk.full = dfixed_div(sclk, a);
9181         a.full = dfixed_const(10);
9182         return_efficiency.full = dfixed_const(8);
9183         return_efficiency.full = dfixed_div(return_efficiency, a);
9184         a.full = dfixed_const(32);
9185         bandwidth.full = dfixed_mul(a, sclk);
9186         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9187
9188         return dfixed_trunc(bandwidth);
9189 }
9190
9191 /**
9192  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9193  *
9194  * @wm: watermark calculation data
9195  *
9196  * Calculate the dmif bandwidth used for display (CIK).
9197  * Used for display watermark bandwidth calculations
9198  * Returns the dmif bandwidth in MBytes/s
9199  */
9200 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9201 {
9202         /* Calculate the DMIF Request Bandwidth */
9203         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9204         fixed20_12 disp_clk, bandwidth;
9205         fixed20_12 a, b;
9206
9207         a.full = dfixed_const(1000);
9208         disp_clk.full = dfixed_const(wm->disp_clk);
9209         disp_clk.full = dfixed_div(disp_clk, a);
9210         a.full = dfixed_const(32);
9211         b.full = dfixed_mul(a, disp_clk);
9212
9213         a.full = dfixed_const(10);
9214         disp_clk_request_efficiency.full = dfixed_const(8);
9215         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9216
9217         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9218
9219         return dfixed_trunc(bandwidth);
9220 }
9221
9222 /**
9223  * dce8_available_bandwidth - get the min available bandwidth
9224  *
9225  * @wm: watermark calculation data
9226  *
9227  * Calculate the min available bandwidth used for display (CIK).
9228  * Used for display watermark bandwidth calculations
9229  * Returns the min available bandwidth in MBytes/s
9230  */
9231 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9232 {
9233         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9234         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9235         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9236         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9237
9238         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9239 }
9240
9241 /**
9242  * dce8_average_bandwidth - get the average available bandwidth
9243  *
9244  * @wm: watermark calculation data
9245  *
9246  * Calculate the average available bandwidth used for display (CIK).
9247  * Used for display watermark bandwidth calculations
9248  * Returns the average available bandwidth in MBytes/s
9249  */
9250 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9251 {
9252         /* Calculate the display mode Average Bandwidth
9253          * DisplayMode should contain the source and destination dimensions,
9254          * timing, etc.
9255          */
9256         fixed20_12 bpp;
9257         fixed20_12 line_time;
9258         fixed20_12 src_width;
9259         fixed20_12 bandwidth;
9260         fixed20_12 a;
9261
9262         a.full = dfixed_const(1000);
9263         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9264         line_time.full = dfixed_div(line_time, a);
9265         bpp.full = dfixed_const(wm->bytes_per_pixel);
9266         src_width.full = dfixed_const(wm->src_width);
9267         bandwidth.full = dfixed_mul(src_width, bpp);
9268         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9269         bandwidth.full = dfixed_div(bandwidth, line_time);
9270
9271         return dfixed_trunc(bandwidth);
9272 }
9273
9274 /**
9275  * dce8_latency_watermark - get the latency watermark
9276  *
9277  * @wm: watermark calculation data
9278  *
9279  * Calculate the latency watermark (CIK).
9280  * Used for display watermark bandwidth calculations
9281  * Returns the latency watermark in ns
9282  */
9283 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9284 {
9285         /* First calculate the latency in ns */
9286         u32 mc_latency = 2000; /* 2000 ns. */
9287         u32 available_bandwidth = dce8_available_bandwidth(wm);
9288         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9289         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9290         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9291         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9292                 (wm->num_heads * cursor_line_pair_return_time);
9293         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9294         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9295         u32 tmp, dmif_size = 12288;
9296         fixed20_12 a, b, c;
9297
9298         if (wm->num_heads == 0)
9299                 return 0;
9300
9301         a.full = dfixed_const(2);
9302         b.full = dfixed_const(1);
9303         if ((wm->vsc.full > a.full) ||
9304             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9305             (wm->vtaps >= 5) ||
9306             ((wm->vsc.full >= a.full) && wm->interlaced))
9307                 max_src_lines_per_dst_line = 4;
9308         else
9309                 max_src_lines_per_dst_line = 2;
9310
9311         a.full = dfixed_const(available_bandwidth);
9312         b.full = dfixed_const(wm->num_heads);
9313         a.full = dfixed_div(a, b);
9314
9315         b.full = dfixed_const(mc_latency + 512);
9316         c.full = dfixed_const(wm->disp_clk);
9317         b.full = dfixed_div(b, c);
9318
9319         c.full = dfixed_const(dmif_size);
9320         b.full = dfixed_div(c, b);
9321
9322         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9323
9324         b.full = dfixed_const(1000);
9325         c.full = dfixed_const(wm->disp_clk);
9326         b.full = dfixed_div(c, b);
9327         c.full = dfixed_const(wm->bytes_per_pixel);
9328         b.full = dfixed_mul(b, c);
9329
9330         lb_fill_bw = min(tmp, dfixed_trunc(b));
9331
9332         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9333         b.full = dfixed_const(1000);
9334         c.full = dfixed_const(lb_fill_bw);
9335         b.full = dfixed_div(c, b);
9336         a.full = dfixed_div(a, b);
9337         line_fill_time = dfixed_trunc(a);
9338
9339         if (line_fill_time < wm->active_time)
9340                 return latency;
9341         else
9342                 return latency + (line_fill_time - wm->active_time);
9343
9344 }
9345
9346 /**
9347  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9348  * average and available dram bandwidth
9349  *
9350  * @wm: watermark calculation data
9351  *
9352  * Check if the display average bandwidth fits in the display
9353  * dram bandwidth (CIK).
9354  * Used for display watermark bandwidth calculations
9355  * Returns true if the display fits, false if not.
9356  */
9357 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9358 {
9359         if (dce8_average_bandwidth(wm) <=
9360             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9361                 return true;
9362         else
9363                 return false;
9364 }
9365
9366 /**
9367  * dce8_average_bandwidth_vs_available_bandwidth - check
9368  * average and available bandwidth
9369  *
9370  * @wm: watermark calculation data
9371  *
9372  * Check if the display average bandwidth fits in the display
9373  * available bandwidth (CIK).
9374  * Used for display watermark bandwidth calculations
9375  * Returns true if the display fits, false if not.
9376  */
9377 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9378 {
9379         if (dce8_average_bandwidth(wm) <=
9380             (dce8_available_bandwidth(wm) / wm->num_heads))
9381                 return true;
9382         else
9383                 return false;
9384 }
9385
9386 /**
9387  * dce8_check_latency_hiding - check latency hiding
9388  *
9389  * @wm: watermark calculation data
9390  *
9391  * Check latency hiding (CIK).
9392  * Used for display watermark bandwidth calculations
9393  * Returns true if the display fits, false if not.
9394  */
9395 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9396 {
9397         u32 lb_partitions = wm->lb_size / wm->src_width;
9398         u32 line_time = wm->active_time + wm->blank_time;
9399         u32 latency_tolerant_lines;
9400         u32 latency_hiding;
9401         fixed20_12 a;
9402
9403         a.full = dfixed_const(1);
9404         if (wm->vsc.full > a.full)
9405                 latency_tolerant_lines = 1;
9406         else {
9407                 if (lb_partitions <= (wm->vtaps + 1))
9408                         latency_tolerant_lines = 1;
9409                 else
9410                         latency_tolerant_lines = 2;
9411         }
9412
9413         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9414
9415         if (dce8_latency_watermark(wm) <= latency_hiding)
9416                 return true;
9417         else
9418                 return false;
9419 }
9420
9421 /**
9422  * dce8_program_watermarks - program display watermarks
9423  *
9424  * @rdev: radeon_device pointer
9425  * @radeon_crtc: the selected display controller
9426  * @lb_size: line buffer size
9427  * @num_heads: number of display controllers in use
9428  *
9429  * Calculate and program the display watermarks for the
9430  * selected display controller (CIK).
9431  */
9432 static void dce8_program_watermarks(struct radeon_device *rdev,
9433                                     struct radeon_crtc *radeon_crtc,
9434                                     u32 lb_size, u32 num_heads)
9435 {
9436         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9437         struct dce8_wm_params wm_low, wm_high;
9438         u32 pixel_period;
9439         u32 line_time = 0;
9440         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9441         u32 tmp, wm_mask;
9442
9443         if (radeon_crtc->base.enabled && num_heads && mode) {
9444                 pixel_period = 1000000 / (u32)mode->clock;
9445                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9446
9447                 /* watermark for high clocks */
9448                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9449                     rdev->pm.dpm_enabled) {
9450                         wm_high.yclk =
9451                                 radeon_dpm_get_mclk(rdev, false) * 10;
9452                         wm_high.sclk =
9453                                 radeon_dpm_get_sclk(rdev, false) * 10;
9454                 } else {
9455                         wm_high.yclk = rdev->pm.current_mclk * 10;
9456                         wm_high.sclk = rdev->pm.current_sclk * 10;
9457                 }
9458
9459                 wm_high.disp_clk = mode->clock;
9460                 wm_high.src_width = mode->crtc_hdisplay;
9461                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9462                 wm_high.blank_time = line_time - wm_high.active_time;
9463                 wm_high.interlaced = false;
9464                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9465                         wm_high.interlaced = true;
9466                 wm_high.vsc = radeon_crtc->vsc;
9467                 wm_high.vtaps = 1;
9468                 if (radeon_crtc->rmx_type != RMX_OFF)
9469                         wm_high.vtaps = 2;
9470                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9471                 wm_high.lb_size = lb_size;
9472                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9473                 wm_high.num_heads = num_heads;
9474
9475                 /* set for high clocks */
9476                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9477
9478                 /* possibly force display priority to high */
9479                 /* should really do this at mode validation time... */
9480                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9481                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9482                     !dce8_check_latency_hiding(&wm_high) ||
9483                     (rdev->disp_priority == 2)) {
9484                         DRM_DEBUG_KMS("force priority to high\n");
9485                 }
9486
9487                 /* watermark for low clocks */
9488                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9489                     rdev->pm.dpm_enabled) {
9490                         wm_low.yclk =
9491                                 radeon_dpm_get_mclk(rdev, true) * 10;
9492                         wm_low.sclk =
9493                                 radeon_dpm_get_sclk(rdev, true) * 10;
9494                 } else {
9495                         wm_low.yclk = rdev->pm.current_mclk * 10;
9496                         wm_low.sclk = rdev->pm.current_sclk * 10;
9497                 }
9498
9499                 wm_low.disp_clk = mode->clock;
9500                 wm_low.src_width = mode->crtc_hdisplay;
9501                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9502                 wm_low.blank_time = line_time - wm_low.active_time;
9503                 wm_low.interlaced = false;
9504                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9505                         wm_low.interlaced = true;
9506                 wm_low.vsc = radeon_crtc->vsc;
9507                 wm_low.vtaps = 1;
9508                 if (radeon_crtc->rmx_type != RMX_OFF)
9509                         wm_low.vtaps = 2;
9510                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9511                 wm_low.lb_size = lb_size;
9512                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9513                 wm_low.num_heads = num_heads;
9514
9515                 /* set for low clocks */
9516                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9517
9518                 /* possibly force display priority to high */
9519                 /* should really do this at mode validation time... */
9520                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9521                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9522                     !dce8_check_latency_hiding(&wm_low) ||
9523                     (rdev->disp_priority == 2)) {
9524                         DRM_DEBUG_KMS("force priority to high\n");
9525                 }
9526         }
9527
9528         /* select wm A */
9529         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9530         tmp = wm_mask;
9531         tmp &= ~LATENCY_WATERMARK_MASK(3);
9532         tmp |= LATENCY_WATERMARK_MASK(1);
9533         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9534         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9535                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9536                 LATENCY_HIGH_WATERMARK(line_time)));
9537         /* select wm B */
9538         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9539         tmp &= ~LATENCY_WATERMARK_MASK(3);
9540         tmp |= LATENCY_WATERMARK_MASK(2);
9541         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9542         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9543                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9544                 LATENCY_HIGH_WATERMARK(line_time)));
9545         /* restore original selection */
9546         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9547
9548         /* save values for DPM */
9549         radeon_crtc->line_time = line_time;
9550         radeon_crtc->wm_high = latency_watermark_a;
9551         radeon_crtc->wm_low = latency_watermark_b;
9552 }
9553
9554 /**
9555  * dce8_bandwidth_update - program display watermarks
9556  *
9557  * @rdev: radeon_device pointer
9558  *
9559  * Calculate and program the display watermarks and line
9560  * buffer allocation (CIK).
9561  */
9562 void dce8_bandwidth_update(struct radeon_device *rdev)
9563 {
9564         struct drm_display_mode *mode = NULL;
9565         u32 num_heads = 0, lb_size;
9566         int i;
9567
9568         if (!rdev->mode_info.mode_config_initialized)
9569                 return;
9570
9571         radeon_update_display_priority(rdev);
9572
9573         for (i = 0; i < rdev->num_crtc; i++) {
9574                 if (rdev->mode_info.crtcs[i]->base.enabled)
9575                         num_heads++;
9576         }
9577         for (i = 0; i < rdev->num_crtc; i++) {
9578                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9579                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9580                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9581         }
9582 }
9583
9584 /**
9585  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9586  *
9587  * @rdev: radeon_device pointer
9588  *
9589  * Fetches a GPU clock counter snapshot (SI).
9590  * Returns the 64 bit clock counter snapshot.
9591  */
9592 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9593 {
9594         uint64_t clock;
9595
9596         mutex_lock(&rdev->gpu_clock_mutex);
9597         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9598         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9599                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9600         mutex_unlock(&rdev->gpu_clock_mutex);
9601         return clock;
9602 }
9603
9604 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9605                               u32 cntl_reg, u32 status_reg)
9606 {
9607         int r, i;
9608         struct atom_clock_dividers dividers;
9609         uint32_t tmp;
9610
9611         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9612                                            clock, false, &dividers);
9613         if (r)
9614                 return r;
9615
9616         tmp = RREG32_SMC(cntl_reg);
9617         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9618         tmp |= dividers.post_divider;
9619         WREG32_SMC(cntl_reg, tmp);
9620
9621         for (i = 0; i < 100; i++) {
9622                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9623                         break;
9624                 mdelay(10);
9625         }
9626         if (i == 100)
9627                 return -ETIMEDOUT;
9628
9629         return 0;
9630 }
9631
9632 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9633 {
9634         int r = 0;
9635
9636         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9637         if (r)
9638                 return r;
9639
9640         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9641         return r;
9642 }
9643
9644 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9645 {
9646         int r, i;
9647         struct atom_clock_dividers dividers;
9648         u32 tmp;
9649
9650         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9651                                            ecclk, false, &dividers);
9652         if (r)
9653                 return r;
9654
9655         for (i = 0; i < 100; i++) {
9656                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9657                         break;
9658                 mdelay(10);
9659         }
9660         if (i == 100)
9661                 return -ETIMEDOUT;
9662
9663         tmp = RREG32_SMC(CG_ECLK_CNTL);
9664         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9665         tmp |= dividers.post_divider;
9666         WREG32_SMC(CG_ECLK_CNTL, tmp);
9667
9668         for (i = 0; i < 100; i++) {
9669                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9670                         break;
9671                 mdelay(10);
9672         }
9673         if (i == 100)
9674                 return -ETIMEDOUT;
9675
9676         return 0;
9677 }
9678
9679 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9680 {
9681         struct pci_dev *root = rdev->pdev->bus->self;
9682         int bridge_pos, gpu_pos;
9683         u32 speed_cntl, mask, current_data_rate;
9684         int ret, i;
9685         u16 tmp16;
9686
9687         if (pci_is_root_bus(rdev->pdev->bus))
9688                 return;
9689
9690         if (radeon_pcie_gen2 == 0)
9691                 return;
9692
9693         if (rdev->flags & RADEON_IS_IGP)
9694                 return;
9695
9696         if (!(rdev->flags & RADEON_IS_PCIE))
9697                 return;
9698
9699         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9700         if (ret != 0)
9701                 return;
9702
9703         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9704                 return;
9705
9706         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9707         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9708                 LC_CURRENT_DATA_RATE_SHIFT;
9709         if (mask & DRM_PCIE_SPEED_80) {
9710                 if (current_data_rate == 2) {
9711                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9712                         return;
9713                 }
9714                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9715         } else if (mask & DRM_PCIE_SPEED_50) {
9716                 if (current_data_rate == 1) {
9717                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9718                         return;
9719                 }
9720                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9721         }
9722
9723         bridge_pos = pci_pcie_cap(root);
9724         if (!bridge_pos)
9725                 return;
9726
9727         gpu_pos = pci_pcie_cap(rdev->pdev);
9728         if (!gpu_pos)
9729                 return;
9730
9731         if (mask & DRM_PCIE_SPEED_80) {
9732                 /* re-try equalization if gen3 is not already enabled */
9733                 if (current_data_rate != 2) {
9734                         u16 bridge_cfg, gpu_cfg;
9735                         u16 bridge_cfg2, gpu_cfg2;
9736                         u32 max_lw, current_lw, tmp;
9737
9738                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9739                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9740
9741                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9742                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9743
9744                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9745                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9746
9747                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9748                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9749                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9750
9751                         if (current_lw < max_lw) {
9752                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9753                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9754                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9755                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9756                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9757                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9758                                 }
9759                         }
9760
9761                         for (i = 0; i < 10; i++) {
9762                                 /* check status */
9763                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9764                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9765                                         break;
9766
9767                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9768                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9769
9770                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9771                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9772
9773                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9774                                 tmp |= LC_SET_QUIESCE;
9775                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9776
9777                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9778                                 tmp |= LC_REDO_EQ;
9779                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9780
9781                                 mdelay(100);
9782
9783                                 /* linkctl */
9784                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9785                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9786                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9787                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9788
9789                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9790                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9791                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9792                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9793
9794                                 /* linkctl2 */
9795                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9796                                 tmp16 &= ~((1 << 4) | (7 << 9));
9797                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9798                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9799
9800                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9801                                 tmp16 &= ~((1 << 4) | (7 << 9));
9802                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9803                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9804
9805                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9806                                 tmp &= ~LC_SET_QUIESCE;
9807                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9808                         }
9809                 }
9810         }
9811
9812         /* set the link speed */
9813         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9814         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9815         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9816
9817         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9818         tmp16 &= ~0xf;
9819         if (mask & DRM_PCIE_SPEED_80)
9820                 tmp16 |= 3; /* gen3 */
9821         else if (mask & DRM_PCIE_SPEED_50)
9822                 tmp16 |= 2; /* gen2 */
9823         else
9824                 tmp16 |= 1; /* gen1 */
9825         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9826
9827         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9828         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9829         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9830
9831         for (i = 0; i < rdev->usec_timeout; i++) {
9832                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9833                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9834                         break;
9835                 udelay(1);
9836         }
9837 }
9838
9839 static void cik_program_aspm(struct radeon_device *rdev)
9840 {
9841         u32 data, orig;
9842         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9843         bool disable_clkreq = false;
9844
9845         if (radeon_aspm == 0)
9846                 return;
9847
9848         /* XXX double check IGPs */
9849         if (rdev->flags & RADEON_IS_IGP)
9850                 return;
9851
9852         if (!(rdev->flags & RADEON_IS_PCIE))
9853                 return;
9854
9855         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9856         data &= ~LC_XMIT_N_FTS_MASK;
9857         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9858         if (orig != data)
9859                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9860
9861         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9862         data |= LC_GO_TO_RECOVERY;
9863         if (orig != data)
9864                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9865
9866         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9867         data |= P_IGNORE_EDB_ERR;
9868         if (orig != data)
9869                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9870
9871         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9872         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9873         data |= LC_PMI_TO_L1_DIS;
9874         if (!disable_l0s)
9875                 data |= LC_L0S_INACTIVITY(7);
9876
9877         if (!disable_l1) {
9878                 data |= LC_L1_INACTIVITY(7);
9879                 data &= ~LC_PMI_TO_L1_DIS;
9880                 if (orig != data)
9881                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9882
9883                 if (!disable_plloff_in_l1) {
9884                         bool clk_req_support;
9885
9886                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9887                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9888                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9889                         if (orig != data)
9890                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9891
9892                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9893                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9894                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9895                         if (orig != data)
9896                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9897
9898                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9899                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9900                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9901                         if (orig != data)
9902                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9903
9904                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9905                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9906                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9907                         if (orig != data)
9908                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9909
9910                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9911                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9912                         data |= LC_DYN_LANES_PWR_STATE(3);
9913                         if (orig != data)
9914                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9915
9916                         if (!disable_clkreq &&
9917                             !pci_is_root_bus(rdev->pdev->bus)) {
9918                                 struct pci_dev *root = rdev->pdev->bus->self;
9919                                 u32 lnkcap;
9920
9921                                 clk_req_support = false;
9922                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9923                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9924                                         clk_req_support = true;
9925                         } else {
9926                                 clk_req_support = false;
9927                         }
9928
9929                         if (clk_req_support) {
9930                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9931                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9932                                 if (orig != data)
9933                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9934
9935                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9936                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9937                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9938                                 if (orig != data)
9939                                         WREG32_SMC(THM_CLK_CNTL, data);
9940
9941                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9942                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9943                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9944                                 if (orig != data)
9945                                         WREG32_SMC(MISC_CLK_CTRL, data);
9946
9947                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9948                                 data &= ~BCLK_AS_XCLK;
9949                                 if (orig != data)
9950                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9951
9952                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9953                                 data &= ~FORCE_BIF_REFCLK_EN;
9954                                 if (orig != data)
9955                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9956
9957                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9958                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9959                                 data |= MPLL_CLKOUT_SEL(4);
9960                                 if (orig != data)
9961                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9962                         }
9963                 }
9964         } else {
9965                 if (orig != data)
9966                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9967         }
9968
9969         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9970         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9971         if (orig != data)
9972                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9973
9974         if (!disable_l0s) {
9975                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9976                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9977                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9978                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9979                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9980                                 data &= ~LC_L0S_INACTIVITY_MASK;
9981                                 if (orig != data)
9982                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9983                         }
9984                 }
9985         }
9986 }