Add qemu 2.4.0
[kvmfornfv.git] / qemu / roms / SLOF / board-js2x / llfw / u4mem.c
1 /******************************************************************************
2  * Copyright (c) 2004, 2008 IBM Corporation
3  * All rights reserved.
4  * This program and the accompanying materials
5  * are made available under the terms of the BSD License
6  * which accompanies this distribution, and is available at
7  * http://www.opensource.org/licenses/bsd-license.php
8  *
9  * Contributors:
10  *     IBM Corporation - initial implementation
11  *****************************************************************************/
12 #include <stdint.h>
13 #include <hw.h>
14 #include <stdio.h>
15 #include "stage2.h"
16 #include <cpu.h>
17 #include <string.h>
18
19 /*
20  * compiler switches
21  *******************************************************************************
22  */
23 #define U4_DEBUG
24 #define U4_INFO
25 //#define U4_SHOW_REGS
26
27 int io_getchar(char *);
28
29 /*
30  * version info
31  */
32 static const uint32_t VER    = 2;
33 static const uint32_t SUBVER = 1;
34
35 /*
36  * local macros
37  *******************************************************************************
38  */
39 // bit shifting in Motorola/IBM bit enumeration format (yaks...)
40 #define IBIT( nr )              ( (uint32_t) 0x80000000 >> (nr) )
41 #define BIT( nr )               ( (uint32_t) 0x1 << (nr) )
42
43 /*
44  * macros to detect the current board layout
45  */
46 #define IS_MAUI         ( ( load8_ci( 0xf4000682 ) >> 4 ) == 0 )
47 #define IS_BIMINI               ( ( load8_ci( 0xf4000682 ) >> 4 ) == 1 )
48 #define IS_KAUAI                ( ( load8_ci( 0xf4000682 ) >> 4 ) == 2 )
49
50 /*
51  * local constants
52  *******************************************************************************
53  */
54
55 /*
56  * u4 base address
57  */
58 #define U4_BASE_ADDR            ((uint64_t) 0xf8000000 )
59 #define u4reg( reg )            (U4_BASE_ADDR + (uint64_t) (reg))
60
61 /*
62  * I2C registers
63  */
64 #define I2C_MODE_R              u4reg(0x1000)
65 #define I2C_CTRL_R              u4reg(0x1010)
66 #define I2C_STAT_R              u4reg(0x1020)
67 #define I2C_ISR_R               u4reg(0x1030)
68 #define I2C_ADDR_R              u4reg(0x1050)
69 #define I2C_SUBA_R              u4reg(0x1060)
70 #define I2C_DATA_R              u4reg(0x1070)
71
72 /*
73  * clock control registers & needed bits/masks
74  */
75 #define ClkCntl_R               u4reg(0x0800)
76 #define PLL2Cntl_R              u4reg(0x0860)
77
78 /*
79  * clock control bits & masks
80  */
81 #define CLK_DDR_CLK_MSK         (IBIT(11) | IBIT(12) | IBIT(13))
82
83 /*
84  * memory controller registers
85  */
86 #define RASTimer0_R             u4reg(0x2030)
87 #define RASTimer1_R             u4reg(0x2040)
88 #define CASTimer0_R             u4reg(0x2050)
89 #define CASTimer1_R             u4reg(0x2060)
90 #define MemRfshCntl_R           u4reg(0x2070)
91 #define MemProgCntl_R           u4reg(0x20b0)
92 #define Dm0Cnfg_R               u4reg(0x2200)
93 #define Dm1Cnfg_R               u4reg(0x2210)
94 #define Dm2Cnfg_R               u4reg(0x2220)
95 #define Dm3Cnfg_R               u4reg(0x2230)
96 #define MemWrQCnfg_R            u4reg(0x2270)
97 #define MemArbWt_R              u4reg(0x2280)
98 #define UsrCnfg_R               u4reg(0x2290)
99 #define MemRdQCnfg_R            u4reg(0x22a0)
100 #define MemQArb_R               u4reg(0x22b0)
101 #define MemRWArb_R              u4reg(0x22c0)
102 #define MemBusCnfg_R            u4reg(0x22d0)
103 #define MemBusCnfg2_R           u4reg(0x22e0)
104 #define ODTCntl_R               u4reg(0x23a0)
105 #define MemModeCntl_R           u4reg(0x2500)
106 #define MemPhyModeCntl_R        u4reg(0x2880)
107 #define CKDelayL_R              u4reg(0x2890)
108 #define CKDelayU_R              u4reg(0x28a0)
109 #define IOPadCntl_R             u4reg(0x29a0)
110 #define ByteWrClkDelC0B00_R     u4reg(0x2800)
111 #define ByteWrClkDelC0B01_R     u4reg(0x2810)
112 #define ByteWrClkDelC0B02_R     u4reg(0x2820)
113 #define ByteWrClkDelC0B03_R     u4reg(0x2830)
114 #define ByteWrClkDelC0B04_R     u4reg(0x2900)
115 #define ByteWrClkDelC0B05_R     u4reg(0x2910)
116 #define ByteWrClkDelC0B06_R     u4reg(0x2920)
117 #define ByteWrClkDelC0B07_R     u4reg(0x2930)
118 #define ByteWrClkDelC0B16_R     u4reg(0x2980)
119 #define ByteWrClkDelC0B08_R     u4reg(0x2a00)
120 #define ByteWrClkDelC0B09_R     u4reg(0x2a10)
121 #define ByteWrClkDelC0B10_R     u4reg(0x2a20)
122 #define ByteWrClkDelC0B11_R     u4reg(0x2a30)
123 #define ByteWrClkDelC0B12_R     u4reg(0x2b00)
124 #define ByteWrClkDelC0B13_R     u4reg(0x2b10)
125 #define ByteWrClkDelC0B14_R     u4reg(0x2b20)
126 #define ByteWrClkDelC0B15_R     u4reg(0x2b30)
127 #define ByteWrClkDelC0B17_R     u4reg(0x2b80)
128 #define ReadStrobeDelC0B00_R    u4reg(0x2840)
129 #define ReadStrobeDelC0B01_R    u4reg(0x2850)
130 #define ReadStrobeDelC0B02_R    u4reg(0x2860)
131 #define ReadStrobeDelC0B03_R    u4reg(0x2870)
132 #define ReadStrobeDelC0B04_R    u4reg(0x2940)
133 #define ReadStrobeDelC0B05_R    u4reg(0x2950)
134 #define ReadStrobeDelC0B06_R    u4reg(0x2960)
135 #define ReadStrobeDelC0B07_R    u4reg(0x2970)
136 #define ReadStrobeDelC0B16_R    u4reg(0x2990)
137 #define ReadStrobeDelC0B08_R    u4reg(0x2a40)
138 #define ReadStrobeDelC0B09_R    u4reg(0x2a50)
139 #define ReadStrobeDelC0B10_R    u4reg(0x2a60)
140 #define ReadStrobeDelC0B11_R    u4reg(0x2a70)
141 #define ReadStrobeDelC0B12_R    u4reg(0x2b40)
142 #define ReadStrobeDelC0B13_R    u4reg(0x2b50)
143 #define ReadStrobeDelC0B14_R    u4reg(0x2b60)
144 #define ReadStrobeDelC0B15_R    u4reg(0x2b70)
145 #define ReadStrobeDelC0B17_R    u4reg(0x2b90)
146 #define MemInit00_R             u4reg(0x2100)
147 #define MemInit01_R             u4reg(0x2110)
148 #define MemInit02_R             u4reg(0x2120)
149 #define MemInit03_R             u4reg(0x2130)
150 #define MemInit04_R             u4reg(0x2140)
151 #define MemInit05_R             u4reg(0x2150)
152 #define MemInit06_R             u4reg(0x2160)
153 #define MemInit07_R             u4reg(0x2170)
154 #define MemInit08_R             u4reg(0x2180)
155 #define MemInit09_R             u4reg(0x2190)
156 #define MemInit10_R             u4reg(0x21a0)
157 #define MemInit11_R             u4reg(0x21b0)
158 #define MemInit12_R             u4reg(0x21c0)
159 #define MemInit13_R             u4reg(0x21d0)
160 #define MemInit14_R             u4reg(0x21e0)
161 #define MemInit15_R             u4reg(0x21f0)
162 #define CalConf0_R              u4reg(0x29b0)
163 #define CalConf1_R              u4reg(0x29c0)
164 #define MeasStatusC0_R          u4reg(0x28f0)
165 #define MeasStatusC1_R          u4reg(0x29f0)
166 #define MeasStatusC2_R          u4reg(0x2af0)
167 #define MeasStatusC3_R          u4reg(0x2bf0)
168 #define CalC0_R                 u4reg(0x28e0)
169 #define CalC1_R                 u4reg(0x29e0)
170 #define CalC2_R                 u4reg(0x2ae0)
171 #define CalC3_R                 u4reg(0x2be0)
172 #define RstLdEnVerniersC0_R     u4reg(0x28d0)
173 #define RstLdEnVerniersC1_R     u4reg(0x29d0)
174 #define RstLdEnVerniersC2_R     u4reg(0x2ad0)
175 #define RstLdEnVerniersC3_R     u4reg(0x2bd0)
176 #define ExtMuxVernier0_R        u4reg(0x28b0)
177 #define ExtMuxVernier1_R        u4reg(0x28c0)
178 #define OCDCalCmd_R             u4reg(0x2300)
179 #define OCDCalCntl_R            u4reg(0x2310)
180 #define MCCR_R                  u4reg(0x2440)
181 #define MSRSR_R                 u4reg(0x2410)
182 #define MSRER_R                 u4reg(0x2420)
183 #define MSPR_R                  u4reg(0x2430)
184 #define MSCR_R                  u4reg(0x2400)
185 #define MEAR0_R                 u4reg(0x2460)
186 #define MEAR1_R                 u4reg(0x2470)
187 #define MESR_R                  u4reg(0x2480)
188 #define MRSRegCntl_R            u4reg(0x20c0)
189 #define EMRSRegCntl_R           u4reg(0x20d0)
190 #define APIMemRdCfg_R           u4reg(0x30090)
191 #define APIExcp_R               u4reg(0x300a0)
192
193 /*
194  * common return values
195  */
196 #define RET_OK                   0
197 #define RET_ERR                 -1
198 #define RET_ACERR_CE            -1
199 #define RET_ACERR_UEWT          -2
200 #define RET_ACERR_UE            -3
201
202 /*
203  * 'DIMM slot populated' indicator
204  */
205 #define SL_POP                  1
206
207 /*
208  * spd buffer size
209  */
210 #define SPD_BUF_SIZE            0x40
211
212 /*
213  * maximum number of DIMM banks & DIMM groups
214  */
215 #define NUM_SLOTS               8
216 #define NUM_BANKS               ( NUM_SLOTS / 2 )
217 #define MAX_DGROUPS             ( NUM_SLOTS / 2 )
218 #define SLOT_ADJ()              ( ( IS_MAUI ) ? NUM_SLOTS / 4 : NUM_SLOTS / 2 )
219
220 /*
221  * values needed for auto calibration
222  */
223 #define MAX_DRANKS              NUM_SLOTS
224 #define MAX_BLANE               18
225 #define MAX_RMD                 0xf
226
227 /*
228  * maximum number of supported CAS latencies
229  */
230 #define NUM_CL                  3
231
232 /*
233  * min/max supported CL values by U4
234  */
235 #define U4_MIN_CL               3
236 #define U4_MAX_CL               5
237
238 /*
239  * DIMM constants
240  */
241 #define DIMM_TYPE_MSK           BIT(0)
242 #define DIMM_ORG_x4             BIT(0)
243 #define DIMM_ORG_x8             BIT(1)
244 #define DIMM_ORG_x16            BIT(2)
245 #define DIMM_ORG_MIXx8x16       BIT(30)
246 #define DIMM_ORG_UNKNOWN        0
247 #define DIMM_WIDTH              72
248 #define DIMM_BURSTLEN_4         BIT(2)
249
250 /*
251  * L2 cache size
252  */
253 #define L2_CACHE_SIZE           (uint32_t) 0x100000
254
255 /*
256  * scrub types
257  */
258 #define IMMEDIATE_SCRUB                 IBIT(0)
259 #define IMMEDIATE_SCRUB_WITH_FILL       ( IBIT(0) | IBIT(1) )
260 #define BACKGROUND_SCRUB                ( IBIT(1) | ( 0x29 << 16 ) )
261
262 /*
263  * I2C starting slave addresses of the DIMM banks
264  */
265 #define I2C_START               0x50
266
267 /*
268  * Index to the speed dependend DIMM settings
269  */
270 enum
271 {
272         SPEED_IDX_400 = 0,
273         SPEED_IDX_533,
274         SPEED_IDX_667,
275         NUM_SPEED_IDX
276 };
277
278 /*
279  * number of read/write strobes of the U4
280  */
281 #define NUM_STROBES             18
282
283 /*
284  * 2GB hole definition
285  */
286 static const uint64_t _2GB = (uint64_t) 0x80000000;
287
288 /*
289  * local types
290  *******************************************************************************
291  */
292 /*
293  * DIMM definition
294  */
295 typedef struct
296 {
297         uint32_t m_pop_u32;             // set if bank is populated
298         uint32_t m_bank_u32;            // bank number
299         uint32_t m_clmsk_u32;           // mask of supported CAS latencies
300         uint32_t m_clcnt_u32;           // number of supporetd CAS latencies
301         uint32_t m_clval_pu32[NUM_CL];  // values of supporeted CAS latencies
302         uint32_t m_speed_pu32[NUM_CL];  // speed (Mhz) at CAS latency of same index
303         uint32_t m_size_u32;            // chip size in Mb
304         uint32_t m_rank_u32;            // # of ranks, total size = chip size*rank
305         uint32_t m_orgmsk_u32;          // data organisation (x4, x8, x16) (mask)
306         uint32_t m_orgval_u32;          // data organisation (value)
307         uint32_t m_width_u32;           // data width
308         uint32_t m_ecc_u32;             // set if ecc
309         uint32_t m_type_u32;            // rdimm or udimm
310         uint32_t m_burst_u32;           // supported burst lengths
311         uint32_t m_bankcnt_u32;         // number of banks
312
313         /*
314          * the following timing values are all in 1/100ns
315          */
316         uint32_t m_tCK_pu32[NUM_CL];
317         uint32_t m_tRAS_u32;
318         uint32_t m_tRTP_u32;
319         uint32_t m_tRP_u32;
320         uint32_t m_tWR_u32;
321         uint32_t m_tRRD_u32;
322         uint32_t m_tRC_u32;
323         uint32_t m_tRCD_u32;
324         uint32_t m_tWTR_u32;
325         uint32_t m_tREF_u32;
326         uint32_t m_tRFC_u32;
327 }       dimm_t;
328
329 /*
330  * DIMM group definition
331  */
332 typedef struct
333 {
334         uint32_t  m_size_u32;           // group size in MB
335         uint32_t  m_start_u32;          // in 128Mb granularity
336         uint32_t  m_end_u32;            // in 128Mb granularity
337         uint32_t  m_ss_u32;             // single sided/double sided
338         uint32_t  m_csmode_u32;         // selected CS mode for this group
339         uint32_t  m_add2g_u32;
340         uint32_t  m_sub2g_u32;
341         uint32_t  m_memmd_u32;          // selected mem mode for this group
342         uint32_t  m_dcnt_u32;           // number of DIMMs in group
343         dimm_t   *m_dptr[NUM_SLOTS];
344 }       dgroup_t;
345
346 /*
347  * auto calibration result structure
348  */
349 typedef struct
350 {
351         uint32_t m_MemBusCnfg_u32;
352         uint32_t m_MemBusCnfg2_u32;
353         uint32_t m_RstLdEnVerniers_pu32[4];
354 }       auto_calib_t;
355
356 /*
357  * ECC error structure
358  */
359 typedef struct
360 {
361         int32_t  m_err_i32;
362         uint32_t m_uecnt_u32;           // number of uncorrectable errors
363         uint32_t m_cecnt_u32;           // number of correctable errors
364         uint32_t m_rank_u32;            // erroneous rank
365         uint32_t m_col_u32;             // erroneous column
366         uint32_t m_row_u32;             // erroneous row
367         uint32_t m_bank_u32;            // erroneous bank
368 }       eccerror_t;
369
370 /*
371  * U4 register setup structure
372  */
373 typedef struct
374 {
375         /*
376          * external MUX delays
377          */
378         uint32_t RRMux;
379         uint32_t WRMux;
380         uint32_t WWMux;
381         uint32_t RWMux;
382
383         /*
384          * default Wr/Rd Queue & Arbiter register settings
385          */
386         uint32_t MemRdQCnfg;
387         uint32_t MemWrQCnfg;
388         uint32_t MemQArb;
389         uint32_t MemRWArb;
390
391         /*
392          * misc fixed register values
393          */
394         uint32_t ODTCntl;
395         uint32_t IOPadCntl;
396         uint32_t MemPhyModeCntl;
397         uint32_t OCDCalCntl;
398         uint32_t OCDCalCmd;
399         uint32_t CKDelayL;
400         uint32_t CKDelayU;
401         uint32_t MemBusCnfg;
402         uint32_t CAS1Dly0;
403         uint32_t CAS1Dly1;
404         uint32_t ByteWrClkDel[NUM_STROBES];
405         uint32_t ReadStrobeDel[NUM_STROBES];
406 } reg_statics_t;
407
408 /*
409  * local variables
410  *******************************************************************************
411  */
412 static dimm_t    m_dimm[NUM_SLOTS];
413 static dimm_t    m_gendimm;
414 static uint32_t  m_dcnt_u32;
415 static dimm_t   *m_dptr[NUM_SLOTS];
416 static uint32_t  m_bankoff_u32;
417 static uint32_t  m_bankpop_u32[NUM_BANKS];
418 static uint32_t  m_dclidx_u32;
419 static uint32_t  m_dgrcnt_u32;
420 static dgroup_t  m_dgroup[MAX_DGROUPS];
421 static dgroup_t *m_dgrptr[MAX_DGROUPS];
422 static uint64_t  m_memsize_u64; // memsize in bytes
423
424 /*
425  * local functions
426  *******************************************************************************
427  */
428 static void
429 progbar( void )
430 {
431         static uint8_t  bar[] =
432                         { '|', '/', '-', '\\', 0 };
433         static uint32_t idx = 0;
434
435         printf( "\b%c", bar[idx] );
436
437         if( bar[++idx] == 0 ) {
438                 idx = 0;
439         }
440
441 }
442
443 static void
444 or32_ci( uint64_t r, uint32_t m )
445 {
446         uint32_t v;
447
448         v  = load32_ci( r );
449         v |= m;
450         store32_ci( r, v );
451 }
452
453 static void
454 and32_ci( uint64_t r, uint32_t m )
455 {
456         uint32_t v;
457
458         v  = load32_ci( r );
459         v &= m;
460         store32_ci( r, v );
461 }
462
463 static void
464 dly( uint64_t volatile f_wait_u64 ) \
465 {
466         while( f_wait_u64 ) {
467                 f_wait_u64--;
468         }
469 }
470
471 /*
472  * local i2c access functions
473  */
474 static void
475 i2c_term( void )
476 {
477         uint32_t l_stat_u32;
478
479         /*
480          * clear out all pending int's and wait
481          * for the stop condition to occur
482          */
483         do {
484                 l_stat_u32 = load32_ci( I2C_ISR_R );
485                 store32_ci( I2C_ISR_R, l_stat_u32 );
486         } while( ( l_stat_u32 & IBIT(29) ) == 0 );
487
488 }
489
490 static int32_t
491 i2c_read( uint32_t f_addr_u32, uint32_t f_suba_u32, uint8_t *f_buf_pu08, uint32_t f_len_u32 )
492 {
493         uint32_t  l_val_u32;
494         int32_t   l_ret_i32 = 1;
495
496         /*
497          * parameter check
498          */
499         if( ( f_addr_u32 > (uint32_t) 0x7f ) ||
500             ( f_suba_u32 > (uint32_t) 0xff ) ||
501             ( f_len_u32 == (uint32_t) 0x00 ) ) {
502                 return RET_ERR;
503         }
504
505         /*
506          * set I2C Interface to combined mode
507          */
508         store32_ci( I2C_MODE_R, IBIT(28) | IBIT(29) );
509
510         /*
511          * set address, subaddress & read mode
512          */
513         store32_ci( I2C_ADDR_R, ( f_addr_u32 << 1 ) | (uint32_t) 0x1 );
514         store32_ci( I2C_SUBA_R, f_suba_u32 );
515
516         /*
517          * start address transmission phase
518          */
519         store32_ci( I2C_CTRL_R, IBIT(30) );
520
521         /*
522          * wait for address transmission to finish
523          */
524         do {
525                 l_val_u32 = load32_ci( I2C_ISR_R );
526         } while( ( l_val_u32 & IBIT(30) ) == 0 );
527
528         /*
529          * check for success
530          */
531         if( ( load32_ci( I2C_STAT_R ) & IBIT(30) ) == 0 ) {
532                 i2c_term();
533                 return RET_ERR;
534         } else {
535                 // send ack
536                 store32_ci( I2C_CTRL_R, IBIT(31) );
537                 // clear int
538                 store32_ci( I2C_ISR_R, IBIT(30) );
539         }
540
541         /*
542          * read data
543          */
544         while( l_ret_i32 > 0 ) {
545                 l_val_u32 = load32_ci( I2C_ISR_R );
546
547                 if( ( l_val_u32 & IBIT(31) ) != 0 ) {
548                         // data was received
549                         *f_buf_pu08 = ( uint8_t ) load32_ci( I2C_DATA_R );
550
551                         f_buf_pu08++;
552                         f_len_u32--;
553
554                         /*
555                          * continue when there is more data to read or
556                          * exit if not
557                          */
558                         if( f_len_u32 != 0 ) {
559                                 // send ack
560                                 store32_ci( I2C_CTRL_R, IBIT(31) );
561                                 // clear int
562                                 store32_ci( I2C_ISR_R, IBIT(31) );
563                         } else {
564                                 // send nack
565                                 store32_ci( I2C_CTRL_R, 0 );
566                                 // set exit flag
567                                 l_ret_i32 = RET_OK;
568                         }
569
570                 } else if( ( l_val_u32 & IBIT(29) ) != 0 ) {
571                         // early stop condition
572                         // set exit flag
573                         l_ret_i32 = RET_ERR;
574                 }
575
576         };
577
578         i2c_term();
579
580         return( l_ret_i32 );
581 }
582
583 static uint32_t
584 i2c_get_slot( uint32_t i2c_addr )
585 {
586         uint32_t slot;
587
588         slot = ( i2c_addr - I2C_START ) / 2;
589
590         if( ( i2c_addr & 0x1 ) != 0 ) {
591                 slot += SLOT_ADJ();
592         }
593
594         return slot;
595 }
596
597 /*
598  * 'serial presence detect' interpretation functions
599  */
600 static uint32_t
601 ddr2_get_dimm_rank( uint8_t *f_spd_pu08 )
602 {
603         static const int RANK_IDX = (int) 5;
604
605         return (uint32_t) ( f_spd_pu08[RANK_IDX] & 0x3 ) + 1;
606 }
607
608 static uint32_t
609 ddr2_get_dimm_size( uint8_t *f_spd_pu08 )
610 {
611         static const int SIZE_IDX   = (int) 31;
612         uint8_t          l_smsk_u08;
613         uint32_t         i;
614
615         l_smsk_u08 = ( f_spd_pu08[SIZE_IDX] << 3 ) |
616                      ( f_spd_pu08[SIZE_IDX] >> 5 );
617
618         for( i = 0; ( ( l_smsk_u08 & ( (uint8_t) 0x1 << i ) ) == 0 ) ; i++ );
619
620         return (uint32_t) 0x80 << i;
621 }
622
623 static uint32_t
624 ddr2_get_dimm_type( uint8_t *f_spd_pu08 )
625 {
626         static const int TYPE_IDX = (int) 20;
627
628         return (uint32_t) f_spd_pu08[TYPE_IDX] & DIMM_TYPE_MSK;
629 }
630
631 static uint32_t
632 ddr2_get_dimm_org( uint8_t *f_spd_pu08, uint32_t /*out*/ *f_omsk_pu32 )
633 {
634         static const int ORG_IDX   = (int) 13;
635         uint32_t         l_ret_u32 = (uint32_t) f_spd_pu08[ORG_IDX];
636
637         if( l_ret_u32 == 4 ) {
638                 *f_omsk_pu32  = DIMM_ORG_x4;
639         } else if( l_ret_u32 == 8 ) {
640                 *f_omsk_pu32  = DIMM_ORG_x8;
641                 *f_omsk_pu32 |= DIMM_ORG_MIXx8x16;
642         } else if( l_ret_u32 == 16 ) {
643                 *f_omsk_pu32  = DIMM_ORG_x16;
644                 *f_omsk_pu32 |= DIMM_ORG_MIXx8x16;
645         } else {
646                 *f_omsk_pu32  = DIMM_ORG_UNKNOWN;
647                  l_ret_u32    = (uint32_t) ~0;
648         }
649
650         return l_ret_u32;
651 }
652
653 static uint32_t
654 ddr2_get_dimm_width( uint8_t *f_spd_pu08 )
655 {
656         static const int WIDTH_IDX = (int) 6;
657
658         return (uint32_t) f_spd_pu08[WIDTH_IDX];
659 }
660
661 static uint32_t
662 ddr2_get_dimm_ecc( uint8_t *f_spd_pu08 )
663 {
664         static const int ECC_IDX = (int) 11;
665
666         return ( f_spd_pu08[ECC_IDX] & BIT(1) ) != 0;
667 }
668
669 static uint32_t
670 ddr2_get_dimm_burstlen( uint8_t *f_spd_pu08 )
671 {
672         static const int BURST_IDX = (int) 16;
673
674         return (uint32_t) f_spd_pu08[BURST_IDX];
675 }
676
677 static void
678 ddr2_get_dimm_speed( dimm_t *f_dimm, uint8_t *f_spd_pu08 )
679 {
680         static const int      SPEED_IDX[] = { 25, 23, 9 };
681         static const uint32_t NS[]        = { 25, 33, 66, 75 };
682         uint8_t               l_tmp_u08;
683         uint32_t              l_dspeed_u32;
684         uint32_t              idx = 0;
685         uint32_t              i;
686
687         for( i = NUM_CL - f_dimm->m_clcnt_u32; i < NUM_CL; i++ ) {
688                 l_tmp_u08     = f_spd_pu08[SPEED_IDX[i]];
689                 l_dspeed_u32  = (uint32_t) ( l_tmp_u08 >> 4 ) * 100;
690                 l_tmp_u08    &= (uint8_t) 0xf;
691
692                 if( l_tmp_u08 >= (uint8_t) 10 ) {
693                         l_dspeed_u32 += NS[l_tmp_u08 - 10];
694                 } else {
695                         l_dspeed_u32 += (uint32_t) l_tmp_u08 * 10;
696                 }
697
698                 f_dimm->m_tCK_pu32[idx]    = l_dspeed_u32;
699                 f_dimm->m_speed_pu32[idx]  = (uint32_t) 2000000 / l_dspeed_u32;
700                 f_dimm->m_speed_pu32[idx] += (uint32_t) 5;
701                 f_dimm->m_speed_pu32[idx] /= (uint32_t) 10;
702                 idx++;
703         }
704
705 }
706
707 static void
708 ddr2_get_dimm_timings( dimm_t *f_dimm, uint8_t *f_spd_pu08 )
709 {
710         static const uint32_t NS[]  = { 00, 25, 33, 50, 66, 75, 00, 00 };
711         static const uint32_t USMUL = (uint32_t) 390625;
712         static const int tREF_IDX   = (int) 12;
713         static const int tRP_IDX    = (int) 27;
714         static const int tRRD_IDX   = (int) 28;
715         static const int tRCD_IDX   = (int) 29;
716         static const int tRAS_IDX   = (int) 30;
717         static const int tWR_IDX    = (int) 36;
718         static const int tWTR_IDX   = (int) 37;
719         static const int tRTP_IDX   = (int) 38;
720         static const int tRC_IDX    = (int) 41; // & 40
721         static const int tRFC_IDX   = (int) 42; // & 40
722
723         uint32_t         l_tmp_u32;
724
725         f_dimm->m_tRP_u32  = (uint32_t) f_spd_pu08[tRP_IDX]  *  25;
726         f_dimm->m_tRRD_u32 = (uint32_t) f_spd_pu08[tRRD_IDX] *  25;
727         f_dimm->m_tRCD_u32 = (uint32_t) f_spd_pu08[tRCD_IDX] *  25;
728         f_dimm->m_tWR_u32  = (uint32_t) f_spd_pu08[tWR_IDX]  *  25;
729         f_dimm->m_tWTR_u32 = (uint32_t) f_spd_pu08[tWTR_IDX] *  25;
730         f_dimm->m_tRTP_u32 = (uint32_t) f_spd_pu08[tRTP_IDX] *  25;
731         f_dimm->m_tRAS_u32 = (uint32_t) f_spd_pu08[tRAS_IDX] * 100;
732
733         l_tmp_u32          = (uint32_t) ( f_spd_pu08[tRC_IDX - 1] >> 4 );
734         l_tmp_u32         &= (uint32_t) 0x7;
735         f_dimm->m_tRC_u32  = (uint32_t) f_spd_pu08[tRC_IDX] * 100 +
736                                         NS[l_tmp_u32];
737
738         l_tmp_u32           = (uint32_t) f_spd_pu08[tRFC_IDX - 2];
739         l_tmp_u32          &= (uint32_t) 0xf;
740         f_dimm->m_tRFC_u32  = (uint32_t) 256 * ( l_tmp_u32 & (uint32_t) 0x1 );
741         f_dimm->m_tRFC_u32 += (uint32_t) f_spd_pu08[tRFC_IDX];
742         f_dimm->m_tRFC_u32 *= 100;
743         l_tmp_u32         >>= 1;
744         f_dimm->m_tRFC_u32 += NS[l_tmp_u32];
745
746         l_tmp_u32           = (uint32_t) f_spd_pu08[tREF_IDX];
747         l_tmp_u32          &= (uint32_t) 0x7f;
748
749         if( l_tmp_u32 == 0 ) {
750                 l_tmp_u32 = (uint32_t) 2;
751         } else if( l_tmp_u32 <= (uint32_t) 2 ) {
752                 l_tmp_u32--;
753         }
754
755         f_dimm->m_tREF_u32 = ( l_tmp_u32 + 1 ) * USMUL;
756 }
757
758 static uint32_t
759 ddr2_get_banks( uint8_t *f_spd_pu08 )
760 {
761         static const int BANK_IDX = (int) 17;
762
763         return (uint32_t) f_spd_pu08[BANK_IDX];
764 }
765
766 static uint32_t
767 ddr2_get_cl_mask( uint8_t *f_spd_pu08 )
768 {
769         static const int CL_IDX = (int) 18;
770
771         return (uint32_t) f_spd_pu08[CL_IDX];
772 }
773
774 static void
775 ddr2_get_cl( dimm_t *f_dimm )
776 {
777         uint32_t l_clcnt_u32 = 0;
778         uint32_t i;
779
780         for( i = 0; ( i < 8 ) && ( l_clcnt_u32 < NUM_CL ) ; i++ ) {
781
782                 if( ( f_dimm->m_clmsk_u32 & ( (uint32_t) 0x1 << i ) ) != 0 ) {
783                         f_dimm->m_clval_pu32[l_clcnt_u32] = i;
784                         l_clcnt_u32++;
785                 }
786
787         }
788
789         f_dimm->m_clcnt_u32 = l_clcnt_u32;
790 }
791
792 static uint32_t
793 ddr2_cl2speed( dimm_t *f_dimm, uint32_t f_cl_u32, uint32_t *f_tCK_pu32 )
794 {
795         uint32_t i;
796
797         for(i = 0; (i < NUM_CL) && (f_dimm->m_clval_pu32[i] != f_cl_u32); i++);
798
799         if( i == NUM_CL ) {
800                 return (uint32_t) ~0;
801         }
802
803         *f_tCK_pu32 = f_dimm->m_tCK_pu32[i];
804
805         return f_dimm->m_speed_pu32[i];
806 }
807
808 static void
809 ddr2_setupDIMM( dimm_t *f_dimm, uint32_t f_bank_u32, uint8_t *f_spd_pu08 )
810 {
811         f_dimm->m_pop_u32     = SL_POP;
812         f_dimm->m_bank_u32    = f_bank_u32;
813         f_dimm->m_size_u32    = ddr2_get_dimm_size( f_spd_pu08 );
814         f_dimm->m_rank_u32    = ddr2_get_dimm_rank( f_spd_pu08 );
815         f_dimm->m_type_u32    = ddr2_get_dimm_type( f_spd_pu08 );
816         f_dimm->m_orgval_u32  = ddr2_get_dimm_org( f_spd_pu08, &f_dimm->m_orgmsk_u32 );
817         f_dimm->m_width_u32   = ddr2_get_dimm_width( f_spd_pu08 );
818         f_dimm->m_ecc_u32     = ddr2_get_dimm_ecc( f_spd_pu08 );
819         f_dimm->m_burst_u32   = ddr2_get_dimm_burstlen( f_spd_pu08 );
820         f_dimm->m_clmsk_u32   = ddr2_get_cl_mask( f_spd_pu08 );
821         f_dimm->m_bankcnt_u32 = ddr2_get_banks( f_spd_pu08 );
822
823         ddr2_get_cl( f_dimm );
824         ddr2_get_dimm_speed( f_dimm, f_spd_pu08 );
825         ddr2_get_dimm_timings( f_dimm, f_spd_pu08 );
826 }
827
828 static int32_t
829 ddr2_checkSPD( uint8_t *f_spd_pu08 )
830 {
831         uint8_t  crc = 0;
832         uint32_t i;
833
834         for( i = 0; i < SPD_BUF_SIZE - 1; i++ ) {
835                 crc += f_spd_pu08[i];
836         }
837
838         if( crc != f_spd_pu08[i] ) {
839                 return RET_ERR;
840         }
841
842         return RET_OK;
843 }
844
845 static int32_t
846 ddr2_readSPDs( void )
847 {
848         static const uint32_t MAX_SPD_FAIL = 3;
849         uint8_t  l_spdbuf_pu08[SPD_BUF_SIZE];
850         uint32_t l_bankfail_u32 = 0;
851         uint32_t l_spdfail_u32  = 0;
852         int32_t  l_i2c_i32      = RET_OK;
853         int32_t  l_spd_i32      = RET_OK;
854         int32_t  ret            = RET_OK;
855         uint32_t i;
856
857         /*
858          * read spd's and detect populated slots
859          */
860         for( i = 0; i < NUM_SLOTS; i++ ) {
861                 /*
862                  * indicate slot as empty
863                  */
864                 m_dimm[i].m_pop_u32 = 0;
865
866                 /*
867                  * check whether bank is switched off
868                  */
869                 if( ( m_bankoff_u32 & ( 0x1 << ( i / 2 ) ) ) != 0 ) {
870                         continue;
871                 }
872
873                 /*
874                  * read SPD data
875                  */
876
877                 /*
878                  * reset SPD fail counter
879                  */
880                 l_spdfail_u32 = MAX_SPD_FAIL;
881                 l_spd_i32     = RET_OK;
882
883                 while( l_spdfail_u32 != 0 ) {
884                         l_i2c_i32 = i2c_read( I2C_START + i, 0x0, l_spdbuf_pu08, SPD_BUF_SIZE );
885
886                         if( l_i2c_i32 == RET_OK ) {
887                                 l_spd_i32 = ddr2_checkSPD( l_spdbuf_pu08 );
888
889                                 if( l_spd_i32 == RET_OK ) {
890                                         l_spdfail_u32 = 0;
891                                 } else {
892                                         l_spdfail_u32--;
893                                 }
894
895                         } else {
896                                 l_spdfail_u32--;
897                         }
898
899                 }
900
901                 if( l_spd_i32 != RET_OK ) {
902                         #ifdef U4_INFO
903                         printf( "\r\n  [ERROR -> SPD read failure in slot %u]",
904                                 i2c_get_slot( I2C_START + i ) );
905                         #endif
906
907                         l_bankfail_u32 |= ( 0x1 << ( i / 2 ) );
908                         ret             = RET_ERR;
909                 } else if( l_i2c_i32 == RET_OK ) {
910                         /*
911                          * slot is populated
912                          */
913                         ddr2_setupDIMM( &m_dimm[i], i / 2, l_spdbuf_pu08 );
914
915                         m_dptr[m_dcnt_u32] = &m_dimm[i];
916                         m_dcnt_u32++;
917                 }
918
919         }
920
921         if( ret != RET_OK ) {
922                 m_bankoff_u32 |= l_bankfail_u32;
923                 #ifdef U4_INFO
924                 printf( "\r\n" );
925                 #endif
926         }
927
928         return ret;
929 }
930
931 static int32_t
932 ddr2_setupDIMMcfg( void )
933 {
934         uint32_t  l_tmp_u32;
935         uint32_t  l_tmp0_u32;
936         uint32_t  l_tmp1_u32;
937         uint32_t  i, j, e, b;
938
939         /*
940          * check wether on board DIMM slot population is valid
941          */
942         e = 0;
943         b = 0;
944         for( i = 0; i < NUM_SLOTS; i += 2 ) {
945
946                 switch( m_dimm[i].m_pop_u32 + m_dimm[i+1].m_pop_u32 ) {
947                         case 0: {
948                                 m_bankpop_u32[i/2] = 0;
949                                 break;
950                         }
951
952                         case 2 * SL_POP: {
953                                 m_bankpop_u32[i/2] = !0;
954                                 b++;
955                                 break;
956                         }
957
958                         default: {
959                                 #ifdef U4_DEBUG
960                                 printf( "\r\n  [ERROR -> only 1 DIMM installed in bank %u]", i/2 );
961                                 #endif
962                                 e++;
963                         }
964
965                 }
966
967         }
968
969         /*
970          * return on error
971          */
972         if( e != 0 ) {
973                 #ifdef U4_DEBUG
974                 printf( "\r\n" );
975                 #endif
976                 return RET_ERR;
977         }
978
979         if( b == 0 ) {
980                 #ifdef U4_DEBUG
981                 printf( "\r\n  [ERROR -> no (functional) memory installed]\r\n" );
982                 #endif
983                 return RET_ERR;
984         }
985
986         /*
987          * check DIMM compatibility
988          * configuration is 128 bit data/128 bit bus
989          * -all DIMMs must be organized as x4
990          * -all DIMMs must be 72 bit wide with ECC
991          * -all DIMMs must be registered DIMMs (RDIMMs)
992          * -paired DIMMs must have the same # of ranks, size & organization
993          */
994
995         /*
996          * check DIMM ranks & sizes
997          */
998         e = 0;
999         for( i = 0; i < NUM_SLOTS; i += 2 ) {
1000
1001                 if( (   m_bankpop_u32[i/2]   != 0                      ) &&
1002                     ( ( m_dimm[i].m_rank_u32 != m_dimm[i+1].m_rank_u32 ) ||
1003                       ( m_dimm[i].m_size_u32 != m_dimm[i+1].m_size_u32 ) ) ) {
1004                         #ifdef U4_DEBUG
1005                         printf( "\r\n  [ERROR -> installed DIMMs in bank %u have different ranks/sizes]", i/2 );
1006                         #endif
1007                         e++;
1008                 }
1009
1010         }
1011
1012         /*
1013          * return on error
1014          */
1015         if( e != 0 ) {
1016                 #ifdef U4_DEBUG
1017                 printf( "\r\n" );
1018                 #endif
1019                 return RET_ERR;
1020         }
1021
1022         /*
1023          * check valid DIMM organisation (must be x4)
1024          */
1025         e = 0;
1026         for( i = 0; i < m_dcnt_u32; i++ ) {
1027
1028                 if( ( m_dptr[i]->m_orgmsk_u32 & DIMM_ORG_x4 ) == 0 ) {
1029                         #ifdef U4_DEBUG
1030                         printf( "\r\n  [ERROR -> wrong DIMM organisation in bank %u]",
1031                                 m_dptr[i]->m_bank_u32 );
1032                         #endif
1033                         e++;
1034                 }
1035
1036         }
1037
1038         /*
1039          * return on error
1040          */
1041         if( e != 0 ) {
1042                 #ifdef U4_DEBUG
1043                 printf( "\r\n" );
1044                 #endif
1045                 return RET_ERR;
1046         }
1047
1048         e = (uint32_t) ~0;
1049         for( i = 0; i < m_dcnt_u32; i++ ) {
1050                 e &= m_dptr[i]->m_type_u32;
1051         }
1052
1053         /*
1054          * return on error
1055          */
1056         if( e == 0 ) {
1057                 #ifdef U4_DEBUG
1058                 printf( "\r\n  [ERROR -> installed DIMMs are of different type]\r\n" );
1059                 #endif
1060                 return RET_ERR;
1061         }
1062
1063         /*
1064          * setup generic dimm
1065          */
1066         m_gendimm.m_type_u32 = e;
1067
1068         /*
1069          * check valid width, ecc & burst length
1070          */
1071         e = 0;
1072         for( i = 0; i < m_dcnt_u32; i++ ) {
1073
1074                 if( m_dptr[i]->m_width_u32 != DIMM_WIDTH ) {
1075                         #ifdef U4_DEBUG
1076                         printf( "\r\n  [ERROR -> invalid DIMM width in bank %u]",
1077                                 m_dptr[i]->m_bank_u32 );
1078                         #endif
1079                         e++;
1080                 }
1081
1082                 if( m_dptr[i]->m_ecc_u32 == 0 ) {
1083                         #ifdef U4_DEBUG
1084                         printf( "\r\n  [ERROR -> DIMM(s) do not support ECC in bank %u]",
1085                                 m_dptr[i]->m_bank_u32 );
1086                         #endif
1087                         e++;
1088                 }
1089
1090                 if( ( m_dptr[i]->m_burst_u32 & DIMM_BURSTLEN_4 ) == 0 ) {
1091                         #ifdef U4_DEBUG
1092                         printf( "\r\n  [ERROR -> DIMM(s) have invalid burst length in bank %u]",
1093                                 m_dptr[i]->m_bank_u32 );
1094                         #endif
1095                         e++;
1096                 }
1097
1098         }
1099
1100         /*
1101          * return on error
1102          */
1103         if( e != 0 ) {
1104                 #ifdef U4_DEBUG
1105                 printf( "\r\n" );
1106                 #endif
1107                 return RET_ERR;
1108         }
1109
1110         /*
1111          * setup generic dimm
1112          */
1113         m_gendimm.m_width_u32 = m_dptr[0]->m_width_u32;
1114         m_gendimm.m_ecc_u32   = m_dptr[0]->m_ecc_u32;
1115         m_gendimm.m_burst_u32 = m_dptr[0]->m_burst_u32;
1116
1117         /*
1118          * success
1119          */
1120         m_gendimm.m_pop_u32 = SL_POP;
1121
1122         /*
1123          * setup timing parameters
1124          */
1125
1126         /*
1127          * find smallest common CL value
1128          */
1129         l_tmp_u32 = (uint32_t) ~0;
1130         for( i = 0; i < m_dcnt_u32; i++ ) {
1131                 l_tmp_u32 &= m_dptr[i]->m_clmsk_u32;
1132         }
1133
1134         m_gendimm.m_clmsk_u32 = l_tmp_u32;
1135         ddr2_get_cl( &m_gendimm );
1136
1137         /*
1138          * find fastest common DIMM speed for all common CL values
1139          */
1140         for( i = 0; i < m_gendimm.m_clcnt_u32; i++ ) {
1141                 m_gendimm.m_speed_pu32[i] = (uint32_t) ~0;
1142
1143                 for( j = 0; j < m_dcnt_u32; j++ ) {
1144                         l_tmp0_u32 =
1145                         ddr2_cl2speed( m_dptr[j],
1146                                        m_gendimm.m_clval_pu32[i],
1147                                        &l_tmp1_u32 );
1148
1149                         if( m_gendimm.m_speed_pu32[i] > l_tmp0_u32 ) {
1150                                 m_gendimm.m_speed_pu32[i] = l_tmp0_u32;
1151                                 m_gendimm.m_tCK_pu32[i]   = l_tmp1_u32;
1152                         }
1153
1154                 }
1155
1156         }
1157
1158         /*
1159          * check wether cl values are supported by U4
1160          */
1161         for( i = 0; i < m_gendimm.m_clcnt_u32; i++ ) {
1162
1163                 if( ( m_gendimm.m_clval_pu32[i] >= U4_MIN_CL ) &&
1164                     ( m_gendimm.m_clval_pu32[i] <= U4_MAX_CL ) ) {
1165                         break;
1166                 }
1167
1168         }
1169
1170         if( i == m_gendimm.m_clcnt_u32 ) {
1171                 #ifdef U4_DEBUG
1172                 printf( "\r\n  [ERROR -> DIMM's CL values not supported]\r\n" );
1173                 #endif
1174                 return RET_ERR;
1175         }
1176
1177         /*
1178          * choose cl/speed values to use: prefer speed over CL
1179          * i holds smallest supported cl value of u4 already
1180          */
1181         l_tmp_u32 = 0;
1182         while( i < m_gendimm.m_clcnt_u32 ) {
1183
1184                 if( l_tmp_u32 < m_gendimm.m_speed_pu32[i] ) {
1185                         l_tmp_u32    = m_gendimm.m_speed_pu32[i];
1186                         m_dclidx_u32 = i;
1187                 }
1188
1189                 i++;
1190         }
1191
1192         /*
1193          * choose largest number of banks
1194          */
1195         m_gendimm.m_bankcnt_u32 = 0;
1196
1197         for( i = 0; i < m_dcnt_u32; i++ ) {
1198
1199                 if( m_gendimm.m_bankcnt_u32 < m_dptr[i]->m_bankcnt_u32 ) {
1200                         m_gendimm.m_bankcnt_u32 = m_dptr[i]->m_bankcnt_u32;
1201                 }
1202
1203         }
1204
1205         /*
1206          * setup fastest possible timing parameters for all DIMMs
1207          */
1208         m_gendimm.m_tRP_u32  = 0;
1209         m_gendimm.m_tRRD_u32 = 0;
1210         m_gendimm.m_tRCD_u32 = 0;
1211         m_gendimm.m_tWR_u32  = 0;
1212         m_gendimm.m_tWTR_u32 = 0;
1213         m_gendimm.m_tRTP_u32 = 0;
1214         m_gendimm.m_tRAS_u32 = 0;
1215         m_gendimm.m_tRC_u32  = 0;
1216         m_gendimm.m_tRFC_u32 = 0;
1217         m_gendimm.m_tREF_u32 = (uint32_t) ~0;
1218
1219         for( i = 0; i < m_dcnt_u32; i++ ) {
1220
1221                 if( m_gendimm.m_tRP_u32  < m_dptr[i]->m_tRP_u32  ) {
1222                         m_gendimm.m_tRP_u32  = m_dptr[i]->m_tRP_u32;
1223                 }
1224
1225                 if( m_gendimm.m_tRRD_u32 < m_dptr[i]->m_tRRD_u32 ) {
1226                         m_gendimm.m_tRRD_u32 = m_dptr[i]->m_tRRD_u32;
1227                 }
1228
1229                 if( m_gendimm.m_tRCD_u32 < m_dptr[i]->m_tRCD_u32 ) {
1230                         m_gendimm.m_tRCD_u32 = m_dptr[i]->m_tRCD_u32;
1231                 }
1232
1233                 if( m_gendimm.m_tWR_u32  < m_dptr[i]->m_tWR_u32  ) {
1234                         m_gendimm.m_tWR_u32  = m_dptr[i]->m_tWR_u32;
1235                 }
1236
1237                 if( m_gendimm.m_tWTR_u32 < m_dptr[i]->m_tWTR_u32 ) {
1238                         m_gendimm.m_tWTR_u32 = m_dptr[i]->m_tWTR_u32;
1239                 }
1240
1241                 if( m_gendimm.m_tRTP_u32 < m_dptr[i]->m_tRTP_u32 ) {
1242                         m_gendimm.m_tRTP_u32 = m_dptr[i]->m_tRTP_u32;
1243                 }
1244
1245                 if( m_gendimm.m_tRAS_u32 < m_dptr[i]->m_tRAS_u32 ) {
1246                         m_gendimm.m_tRAS_u32 = m_dptr[i]->m_tRAS_u32;
1247                 }
1248
1249                 if( m_gendimm.m_tRC_u32  < m_dptr[i]->m_tRC_u32  ) {
1250                         m_gendimm.m_tRC_u32  = m_dptr[i]->m_tRC_u32;
1251                 }
1252
1253                 if( m_gendimm.m_tRFC_u32 < m_dptr[i]->m_tRFC_u32 ) {
1254                         m_gendimm.m_tRFC_u32 = m_dptr[i]->m_tRFC_u32;
1255                 }
1256
1257                 if( m_gendimm.m_tREF_u32 > m_dptr[i]->m_tREF_u32 ) {
1258                         m_gendimm.m_tREF_u32 = m_dptr[i]->m_tREF_u32;
1259                 }
1260
1261         }
1262
1263         return RET_OK;
1264 }
1265
1266 static void
1267 u4_group2dimmsDS( dimm_t *f_dimm0, dimm_t *f_dimm1 )
1268 {
1269         dgroup_t *l_dgr = &m_dgroup[m_dgrcnt_u32];
1270
1271         /*
1272          * known conditions at this point:
1273          * -at least 2 slots are populated
1274          * -the 2 DIMMs are equal
1275          * -DIMMs are double sided (2 ranks)
1276          *
1277          * RESULT:
1278          * 1 group of 2 ranks (2 ranks/2 DIMMs)
1279          * -> CS mode 1 (one double sided DIMM pair)
1280          */
1281         l_dgr->m_size_u32   = 2 * ( f_dimm0->m_size_u32 * f_dimm0->m_rank_u32 );
1282         l_dgr->m_ss_u32     = 0;
1283         l_dgr->m_csmode_u32 = 1;
1284         l_dgr->m_dcnt_u32   = 2;
1285         l_dgr->m_dptr[0]    = f_dimm0;
1286         l_dgr->m_dptr[1]    = f_dimm1;
1287
1288         m_dgrcnt_u32++;
1289 }
1290
1291 static void
1292 u4_group2dimmsSS( dimm_t *f_dimm0, dimm_t *f_dimm1 )
1293 {
1294         dgroup_t *l_dgr = &m_dgroup[m_dgrcnt_u32];
1295
1296         /*
1297          * known conditions at this point:
1298          * -at least 2 slots are populated
1299          * -the 2 DIMMs are equal
1300          * -DIMMs are single sided (1 rank)
1301          *
1302          * RESULT:
1303          * 1 group of 1 rank (1 rank/2 DIMMs)
1304          * -> CS mode 0 (one single sided DIMM pair)
1305          */
1306         l_dgr->m_size_u32   = 2 * ( f_dimm0->m_size_u32 * f_dimm0->m_rank_u32 );
1307         l_dgr->m_ss_u32     = 1;
1308         l_dgr->m_csmode_u32 = 0;
1309         l_dgr->m_dcnt_u32   = 2;
1310         l_dgr->m_dptr[0]    = f_dimm0;
1311         l_dgr->m_dptr[1]    = f_dimm1;
1312
1313         m_dgrcnt_u32++;
1314 }
1315
1316 static void
1317 u4_group4dimmsDS( dimm_t *f_dimm0, dimm_t *f_dimm1,
1318                   dimm_t *f_dimm2, dimm_t *f_dimm3 )
1319 {
1320         dgroup_t *l_dgr = &m_dgroup[m_dgrcnt_u32];
1321
1322         /*
1323          * known conditions at this point:
1324          * -4 slots are populated
1325          * -all 4 DIMMs are equal
1326          * -DIMMs are double sided (2 ranks)
1327          *
1328          * RESULT:
1329          * 1 group of 4 ranks (2 ranks/2 DIMMs)
1330          * -> CS mode 2 (two double sided DIMM pairs)
1331          */
1332         l_dgr->m_size_u32   = 4 * ( f_dimm0->m_size_u32 * f_dimm0->m_rank_u32 );
1333         l_dgr->m_ss_u32     = 0;
1334         l_dgr->m_csmode_u32 = 2;
1335         l_dgr->m_dcnt_u32   = 4;
1336         l_dgr->m_dptr[0]    = f_dimm0;
1337         l_dgr->m_dptr[1]    = f_dimm1;
1338         l_dgr->m_dptr[2]    = f_dimm2;
1339         l_dgr->m_dptr[3]    = f_dimm3;
1340
1341         m_dgrcnt_u32++;
1342 }
1343
1344 static void
1345 u4_group4dimmsSS( dimm_t *f_dimm0, dimm_t *f_dimm1,
1346                   dimm_t *f_dimm2, dimm_t *f_dimm3 )
1347 {
1348         dgroup_t *l_dgr = &m_dgroup[m_dgrcnt_u32];
1349
1350         /*
1351          * known conditions at this point:
1352          * -4 slots are populated
1353          * -all 4 DIMMs are equal
1354          * -DIMMs are single sided (1 rank)
1355          *
1356          * RESULT:
1357          * 1 group of 2 ranks (1 rank/2 DIMMs)
1358          * -> CS mode 1 (two single sided DIMM pairs)
1359          */
1360         l_dgr->m_size_u32   = 4 * ( f_dimm0->m_size_u32 * f_dimm0->m_rank_u32 );
1361         l_dgr->m_ss_u32     = 1;
1362         l_dgr->m_csmode_u32 = 1;
1363         l_dgr->m_dcnt_u32   = 4;
1364         l_dgr->m_dptr[0]    = f_dimm0;
1365         l_dgr->m_dptr[1]    = f_dimm1;
1366         l_dgr->m_dptr[2]    = f_dimm2;
1367         l_dgr->m_dptr[3]    = f_dimm3;
1368
1369         m_dgrcnt_u32++;
1370 }
1371
1372 static void
1373 u4_group8dimmsDS( dimm_t *f_dimm0, dimm_t *f_dimm1,
1374                   dimm_t *f_dimm2, dimm_t *f_dimm3,
1375                   dimm_t *f_dimm4, dimm_t *f_dimm5,
1376                   dimm_t *f_dimm6, dimm_t *f_dimm7 )
1377 {
1378         dgroup_t *l_dgr = &m_dgroup[m_dgrcnt_u32];
1379
1380         /*
1381          * known conditions at this point:
1382          * -8 slots are populated
1383          * -all 8 DIMMs are equal
1384          * -DIMMs are double sided (2 ranks)
1385          *
1386          * RESULT:
1387          * 1 group of 8 ranks (2 ranks/2 DIMMs)
1388          * -> CS mode 3 (four double sided DIMM pairs)
1389          */
1390         l_dgr->m_size_u32   = 8 * ( f_dimm0->m_size_u32 * f_dimm0->m_rank_u32 );
1391         l_dgr->m_ss_u32     = 0;
1392         l_dgr->m_csmode_u32 = 3;
1393         l_dgr->m_dcnt_u32   = 8;
1394         l_dgr->m_dptr[0]    = f_dimm0;
1395         l_dgr->m_dptr[1]    = f_dimm1;
1396         l_dgr->m_dptr[2]    = f_dimm2;
1397         l_dgr->m_dptr[3]    = f_dimm3;
1398         l_dgr->m_dptr[4]    = f_dimm4;
1399         l_dgr->m_dptr[5]    = f_dimm5;
1400         l_dgr->m_dptr[6]    = f_dimm6;
1401         l_dgr->m_dptr[7]    = f_dimm7;
1402
1403         m_dgrcnt_u32++;
1404 }
1405
1406 static void
1407 u4_group8dimmsSS( dimm_t *f_dimm0, dimm_t *f_dimm1,
1408                   dimm_t *f_dimm2, dimm_t *f_dimm3,
1409                   dimm_t *f_dimm4, dimm_t *f_dimm5,
1410                   dimm_t *f_dimm6, dimm_t *f_dimm7 )
1411 {
1412         dgroup_t *l_dgr = &m_dgroup[m_dgrcnt_u32];
1413
1414         /*
1415          * known conditions at this point:
1416          * -8 slots are populated
1417          * -all 8 DIMMs are equal
1418          * -DIMMs are single sided (1 rank)
1419          *
1420          * RESULT:
1421          * 1 group of 4 ranks (1 rank/2 DIMMs)
1422          * -> CS mode 2 (four single sided DIMM pairs)
1423          */
1424         l_dgr->m_size_u32   = 8 * ( f_dimm0->m_size_u32 * f_dimm0->m_rank_u32 );
1425         l_dgr->m_ss_u32     = 1;
1426         l_dgr->m_csmode_u32 = 2;
1427         l_dgr->m_dcnt_u32   = 8;
1428         l_dgr->m_dptr[0]    = f_dimm0;
1429         l_dgr->m_dptr[1]    = f_dimm1;
1430         l_dgr->m_dptr[2]    = f_dimm2;
1431         l_dgr->m_dptr[3]    = f_dimm3;
1432         l_dgr->m_dptr[4]    = f_dimm4;
1433         l_dgr->m_dptr[5]    = f_dimm5;
1434         l_dgr->m_dptr[6]    = f_dimm6;
1435         l_dgr->m_dptr[7]    = f_dimm7;
1436
1437         m_dgrcnt_u32++;
1438 }
1439
1440 static int32_t
1441 u4_Dcmp( dimm_t *f_dimm0, dimm_t *f_dimm1 )
1442 {
1443
1444         if( ( f_dimm0->m_size_u32 == f_dimm1->m_size_u32 ) &&
1445             ( f_dimm0->m_rank_u32 == f_dimm1->m_rank_u32 ) ) {
1446                 return RET_OK;
1447         }
1448
1449         return RET_ERR;
1450 }
1451
1452 static void
1453 u4_group1banks( uint32_t *bidx )
1454 {
1455         uint32_t didx = 2 * bidx[0];
1456
1457         /*
1458          * known conditions at this point:
1459          * -either DIMMs 0 & 4 or
1460          *         DIMMs 1 & 5 or
1461          *         DIMMs 2 & 6 or
1462          *         DIMMs 3 & 7 are populated
1463          * -3 (bimini)/1 (maui) pair of slots is empty
1464          * -installed DIMMs are equal
1465          */
1466
1467         /*
1468          * double/single sided setup
1469          */
1470         if( m_dimm[didx].m_rank_u32 == 1 ) {
1471                 u4_group2dimmsSS( &m_dimm[didx], &m_dimm[didx+1] );
1472         } else {
1473                 u4_group2dimmsDS( &m_dimm[didx], &m_dimm[didx+1] );
1474         }
1475
1476 }
1477
1478 static void
1479 u4_group2banks( uint32_t *bidx )
1480 {
1481         uint32_t didx0 = 2 * bidx[0];
1482         uint32_t didx1 = 2 * bidx[1];
1483
1484         /*
1485          * known conditions at this point:
1486          * -4 slots are populated
1487          */
1488
1489         /*
1490          * check wether DIMM banks may be grouped
1491          */
1492         if( ( ( ( bidx[0] + bidx[1] ) & 0x1 )           != 0 ) &&
1493             ( u4_Dcmp( &m_dimm[didx0], &m_dimm[didx1] ) == 0 ) ) {
1494                 /*
1495                  * double/single sided setup
1496                  * NOTE: at this point all DIMMs have the same amount
1497                  * of ranks, therefore only the # of ranks on DIMM 0 is checked
1498                  */
1499                 if( m_dimm[didx0].m_rank_u32 == 1 ) {
1500                         u4_group4dimmsSS( &m_dimm[didx0], &m_dimm[didx0+1],
1501                                           &m_dimm[didx1], &m_dimm[didx1+1]);
1502                 } else {
1503                         u4_group4dimmsDS( &m_dimm[didx0], &m_dimm[didx0+1],
1504                                           &m_dimm[didx1], &m_dimm[didx1+1]);
1505                 }
1506
1507         } else {
1508                 u4_group1banks( &bidx[0] );
1509                 u4_group1banks( &bidx[1] );
1510         }
1511
1512 }
1513
1514 static void
1515 u4_group3banks( uint32_t *bidx )
1516 {
1517
1518         if(        ( bidx[0] == 0 ) && ( bidx[1] == 1 ) ) {
1519                 u4_group2banks( &bidx[0] );
1520                 u4_group1banks( &bidx[2] );
1521         } else if( ( bidx[1] == 2 ) && ( bidx[2] == 3 ) ) {
1522                 u4_group2banks( &bidx[1] );
1523                 u4_group1banks( &bidx[0] );
1524         }
1525
1526 }
1527
1528 static void
1529 u4_group4banks( uint32_t *bidx )
1530 {
1531         uint32_t didx0 = 2 * bidx[0];
1532         uint32_t didx1 = 2 * bidx[1];
1533         uint32_t didx2 = 2 * bidx[2];
1534         uint32_t didx3 = 2 * bidx[3];
1535
1536         if( ( u4_Dcmp( &m_dimm[didx0], &m_dimm[didx1] ) == RET_OK ) &&
1537             ( u4_Dcmp( &m_dimm[didx2], &m_dimm[didx3] ) == RET_OK ) &&
1538             ( u4_Dcmp( &m_dimm[didx0], &m_dimm[didx2] ) == RET_OK ) ) {
1539
1540                 if( m_dimm[didx0].m_rank_u32 == 1 ) {
1541                         u4_group8dimmsSS( &m_dimm[didx0], &m_dimm[didx0+1],
1542                                           &m_dimm[didx1], &m_dimm[didx1+1],
1543                                           &m_dimm[didx2], &m_dimm[didx2+1],
1544                                           &m_dimm[didx3], &m_dimm[didx3+1] );
1545                 } else {
1546                         u4_group8dimmsDS( &m_dimm[didx0], &m_dimm[didx0+1],
1547                                           &m_dimm[didx1], &m_dimm[didx1+1],
1548                                           &m_dimm[didx2], &m_dimm[didx2+1],
1549                                           &m_dimm[didx3], &m_dimm[didx3+1] );
1550                 }
1551
1552         } else {
1553                 u4_group2banks( &bidx[0] );
1554                 u4_group2banks( &bidx[2] );
1555         }
1556
1557 }
1558
1559 static void
1560 u4_sortDIMMgroups( void )
1561 {
1562         uint32_t i, j;
1563
1564         /*
1565          * setup global group pointers
1566          */
1567         for( i = 0; i < m_dgrcnt_u32; i++ ) {
1568                 m_dgrptr[i] = &m_dgroup[i];
1569         }
1570
1571         /*
1572          * use a simple bubble sort to sort groups by size (descending)
1573          */
1574         for( i = 0; i < ( m_dgrcnt_u32 - 1 ); i++ ) {
1575
1576                 for( j = i + 1; j < m_dgrcnt_u32; j++ ) {
1577
1578                         if( m_dgrptr[i]->m_size_u32 < m_dgrptr[j]->m_size_u32 ) {
1579                                 dgroup_t *l_sgr;
1580
1581                                 l_sgr       = m_dgrptr[i];
1582                                 m_dgrptr[i] = m_dgrptr[j];
1583                                 m_dgrptr[j] = l_sgr;
1584                         }
1585
1586                 }
1587
1588         }
1589
1590 }
1591
1592 static void
1593 u4_calcDIMMcnfg( void )
1594 {
1595         static const uint32_t _2GB  = (uint32_t) 0x00800;
1596         static const uint32_t _4GB  = (uint32_t) 0x01000;
1597         static const uint32_t _64GB = (uint32_t) 0x10000;
1598         uint32_t l_start_u32        = (uint32_t) 0;
1599         uint32_t l_end_u32          = (uint32_t) 0;
1600         uint32_t l_add2g_u32        = (uint32_t) 1;
1601         uint32_t l_sub2g_u32        = (uint32_t) 1;
1602         uint32_t i;
1603
1604         /*
1605          * setup DIMM group parameters
1606          */
1607         for( i = 0; i < m_dgrcnt_u32; i++ ) {
1608                 l_end_u32 = l_start_u32 + m_dgrptr[i]->m_size_u32;
1609
1610                 if( m_dgrptr[i]->m_size_u32 > _2GB ) {
1611
1612                         if( l_end_u32 < _64GB ) {
1613                                 l_add2g_u32 = ( l_end_u32 >> 11 );
1614                         } else {
1615                                 l_add2g_u32 = 1;
1616                         }
1617
1618                         if( l_start_u32 == 0 ) {
1619                                 l_sub2g_u32 = 1;
1620                         } else {
1621                                 l_sub2g_u32 = ( l_start_u32 >> 11 );
1622                         }
1623
1624                 } else if( l_add2g_u32 != 1 ) {
1625                         l_start_u32 += _2GB;
1626                         l_end_u32   += _2GB;
1627                         l_add2g_u32  = 1;
1628                         l_sub2g_u32  = 1;
1629                 }
1630
1631                 /*
1632                  * save values for the group
1633                  */
1634                 m_dgrptr[i]->m_start_u32 = ( l_start_u32 >> 7 ); // = /128
1635                 m_dgrptr[i]->m_end_u32   = ( l_end_u32   >> 7 );
1636                 m_dgrptr[i]->m_add2g_u32 = l_add2g_u32;
1637                 m_dgrptr[i]->m_sub2g_u32 = l_sub2g_u32;
1638
1639                 /*
1640                  * continue with next group
1641                  */
1642                 if( l_end_u32 != _2GB ) {
1643                         l_start_u32 = l_end_u32;
1644                 } else {
1645                         l_start_u32 = _4GB;
1646                 }
1647
1648         }
1649
1650 }
1651
1652 static int32_t
1653 u4_calcDIMMmemmode( void )
1654 {
1655         static const uint32_t MAX_ORG  = (uint32_t) 0x10;
1656         static const uint32_t MIN_BASE = (uint32_t) 0x80;
1657         static const uint32_t MAX_MODE = (uint32_t) 0x10;
1658         static const uint32_t MODE_ADD = (uint32_t) 0x04;
1659         dimm_t   *l_dptr;
1660         uint32_t  l_modeoffs_u32;
1661         uint32_t  l_sizebase_u32;
1662         int32_t   ret = RET_OK;
1663         uint32_t  i, j;
1664
1665         /*
1666          * loop through all DIMM groups and calculate memmode setting
1667          */
1668         for( i = 0; i < m_dgrcnt_u32; i++ ) {
1669                 l_dptr = m_dgrptr[i]->m_dptr[0]; // all dimms in one group are equal!
1670
1671                 l_modeoffs_u32  = MAX_ORG / l_dptr->m_orgval_u32;
1672                 l_modeoffs_u32 /= (uint32_t) 2;
1673                 l_sizebase_u32  = ( MIN_BASE << l_modeoffs_u32 );
1674
1675                 j = 0;
1676                 while( ( l_sizebase_u32 != l_dptr->m_size_u32 ) &&
1677                        ( j               < MAX_MODE           ) ) {
1678                         l_sizebase_u32 <<= 1;
1679                         j += (uint32_t) MODE_ADD;
1680                 }
1681
1682                 // return on error
1683                 if( j >= MAX_MODE ) {
1684                         #ifdef U4_INFO
1685                         uint32_t b, k, l;
1686                         printf( "\r\n  [ERROR -> unsupported memory type in bank(s)" );
1687
1688                         l = 0;
1689                         for( k = 0; k < m_dgrptr[i]->m_dcnt_u32; k++ ) {
1690                                 b = m_dgrptr[i]->m_dptr[k]->m_bank_u32;
1691
1692                                 if( ( l & ( 1 << b ) ) == 0 ) {
1693                                         printf( " %u", b );
1694                                         l |= ( 1 << b );
1695                                 }
1696
1697                         }
1698
1699                         printf( "]\r\n" );
1700                         #endif
1701
1702                         ret = RET_ERR;
1703                 } else {
1704                         m_dgrptr[i]->m_memmd_u32 = l_modeoffs_u32 + j;
1705                 }
1706
1707         }
1708
1709         return ret;
1710 }
1711
1712 static void
1713 u4_setupDIMMgroups( void )
1714 {
1715         static const uint64_t _1MB = (uint64_t) 0x100000;
1716         uint32_t l_bcnt_u32;
1717         uint32_t l_bidx_u32[NUM_BANKS];
1718         uint32_t i;
1719
1720         /*
1721          * calculate number of populated banks
1722          * IMPORTANT: array must be in ascending order!
1723          */
1724         l_bcnt_u32 = 0;
1725         for( i = 0; i < NUM_BANKS; i++ ) {
1726
1727                 if( m_bankpop_u32[i] != 0 ) {
1728                         l_bidx_u32[l_bcnt_u32] = i;
1729                         l_bcnt_u32++;
1730                 }
1731
1732         }
1733
1734         switch( l_bcnt_u32 ) {
1735                 case 4: u4_group4banks( &l_bidx_u32[0] ); break;
1736                 case 3: u4_group3banks( &l_bidx_u32[0] ); break;
1737                 case 2: u4_group2banks( &l_bidx_u32[0] ); break;
1738                 case 1: u4_group1banks( &l_bidx_u32[0] ); break;
1739         }
1740
1741         /*
1742          * sort DIMM groups by size (descending)
1743          */
1744         u4_sortDIMMgroups();
1745
1746         /*
1747          * calculate overall memory size in bytes
1748          * (group size is in MB)
1749          */
1750         m_memsize_u64 = 0;
1751         for( i = 0; i < m_dgrcnt_u32; i++ ) {
1752                 m_memsize_u64 += (uint64_t) m_dgrptr[i]->m_size_u32 * _1MB;
1753         }
1754
1755 }
1756
1757 static int32_t
1758 u4_setup_core_clock( void )
1759 {
1760         static const uint32_t MCLK = (uint32_t) 266;
1761         static const uint32_t CDIV = (uint32_t) 66;
1762         static const uint32_t CMAX = (uint32_t) 7;
1763         static const uint32_t MERR = (uint32_t) 10;
1764         uint32_t volatile     l_cclk_u32;
1765         uint32_t volatile     l_pll2_u32;
1766         uint32_t              i, s;
1767
1768         #ifdef U4_INFO
1769         printf( "  [core clock reset:          ]" );
1770         #endif
1771
1772         /*
1773          * calculate speed value
1774          */
1775         s  = m_gendimm.m_speed_pu32[m_dclidx_u32];
1776         s -= MCLK;
1777         s /= CDIV;
1778
1779         /*
1780          * insert new core clock value
1781          */
1782         l_cclk_u32  = load32_ci( ClkCntl_R );
1783         l_cclk_u32 &= ~CLK_DDR_CLK_MSK;
1784         l_cclk_u32 |= ( s << 18 );
1785
1786
1787         // return on error
1788         if( s > CMAX ) {
1789                 #ifdef U4_INFO
1790                 printf( "\b\b\b\bERR\r\n" );
1791                 #endif
1792                 return RET_ERR;
1793         }
1794
1795         /*
1796          * reset core clock
1797          */
1798         store32_ci( ClkCntl_R, l_cclk_u32 );
1799         dly( 0x1000000 );
1800         or32_ci( PLL2Cntl_R, IBIT(0) );
1801         dly( 0x1000000 );
1802
1803         /*
1804          * wait for reset to finish
1805          */
1806         do {
1807                 l_pll2_u32 = load32_ci( PLL2Cntl_R );
1808         } while( ( l_pll2_u32 & IBIT(0) ) != 0 );
1809
1810         /*
1811          * wait for stable PLL
1812          */
1813         s = 0;
1814         do {
1815                 l_pll2_u32  = ( load32_ci( PLL2Cntl_R ) & IBIT(2) );
1816
1817                 for( i = 0; i < 4; i++ ) {
1818                         l_pll2_u32 &= ( load32_ci( PLL2Cntl_R ) & IBIT(2) );
1819                         l_pll2_u32 &= ( load32_ci( PLL2Cntl_R ) & IBIT(2) );
1820                         l_pll2_u32 &= ( load32_ci( PLL2Cntl_R ) & IBIT(2) );
1821                         dly( 0x10000 );
1822                 }
1823
1824         } while( ( l_pll2_u32 == 0 ) && ( s++ < MERR ) );
1825
1826         if( s >= MERR ) {
1827                 #ifdef U4_INFO
1828                 printf( "\b\b\b\bERR\r\n" );
1829                 #endif
1830                 return RET_ERR;
1831         }
1832
1833         #ifdef U4_INFO
1834         printf( "\b\b\bOK\r\n" );
1835         #endif
1836
1837         return RET_OK;
1838 }
1839
1840 static void
1841 u4_auto_calib_init( void )
1842 {
1843         static const uint32_t SEQ[] = {
1844                 0xb1000000, 0xd1000000, 0xd1000000, 0xd1000000,
1845                 0xd1000000, 0xd1000000, 0xd1000000, 0xd1000000,
1846                 0xd1000000, 0xd1000000, 0xd1000000, 0xd1000000,
1847                 0xd1000000, 0xd1000000, 0xd1000400, 0x00000000,
1848         };
1849
1850         uint64_t i;
1851         uint32_t j;
1852
1853         for( i = MemInit00_R, j = 0; i <= MemInit15_R; i += 0x10, j++ ) {
1854                 store32_ci( i, SEQ[j] );
1855         }
1856
1857 }
1858
1859 #if 0
1860 static uint32_t
1861 u4_RSL_BLane( uint32_t f_Rank_u32, uint32_t f_BLane_u32 )
1862 {
1863         static const uint32_t MemProgCntl_V = (uint32_t) 0x80000500;
1864         static const uint32_t CalConf0_V    = (uint32_t) 0x0000aa10;
1865         uint32_t l_MemProgCntl_u32;
1866         uint32_t l_CalConf0_u32;
1867         uint32_t l_MeasStat_u32;
1868         uint32_t l_CalC_u32;
1869         uint64_t MeasStat_R;
1870         uint64_t CalC_R;
1871         uint64_t VerC_R;
1872         uint32_t shft;
1873         uint32_t v;
1874
1875         if( f_BLane_u32 < 4 ) {
1876                 MeasStat_R   = MeasStatusC0_R;
1877                 CalC_R       = CalC0_R;
1878                 VerC_R       = RstLdEnVerniersC0_R;
1879         } else if( f_BLane_u32  <  8 ) {
1880                 f_BLane_u32 -= 4;
1881                 MeasStat_R   = MeasStatusC1_R;
1882                 CalC_R       = CalC1_R;
1883                 VerC_R       = RstLdEnVerniersC1_R;
1884         } else if( f_BLane_u32  < 12 ) {
1885                 f_BLane_u32 -= 8;
1886                 MeasStat_R   = MeasStatusC2_R;
1887                 CalC_R       = CalC2_R;
1888                 VerC_R       = RstLdEnVerniersC2_R;
1889         } else if( f_BLane_u32 == 16 ) {
1890                 f_BLane_u32  = 4;
1891                 MeasStat_R   = MeasStatusC1_R;
1892                 CalC_R       = CalC1_R;
1893                 VerC_R       = RstLdEnVerniersC1_R;
1894         } else if( f_BLane_u32 == 17 ) {
1895                 f_BLane_u32  = 4;
1896                 MeasStat_R   = MeasStatusC3_R;
1897                 CalC_R       = CalC3_R;
1898                 VerC_R       = RstLdEnVerniersC3_R;
1899         } else {
1900                 f_BLane_u32 -= 12;
1901                 MeasStat_R   = MeasStatusC3_R;
1902                 CalC_R       = CalC3_R;
1903                 VerC_R       = RstLdEnVerniersC3_R;
1904         }
1905
1906         shft = (uint32_t) 28 - ( f_BLane_u32 * 4 );
1907
1908         /*
1909          * start auto calibration logic & wait for completion
1910          */
1911         or32_ci( MeasStat_R, IBIT(0) );
1912
1913         do {
1914                 l_MeasStat_u32 = load32_ci( MeasStat_R );
1915         } while( ( l_MeasStat_u32 & IBIT(0) ) == 1 );
1916
1917         l_CalConf0_u32  = CalConf0_V;
1918         store32_ci( CalConf0_R, l_CalConf0_u32 );
1919
1920         for( v = 0x000; v < (uint32_t) 0x100; v++ ) {
1921                 store32_ci( VerC_R, ( v << 24 ) | ( v << 16 ) );
1922
1923                 l_MemProgCntl_u32  = MemProgCntl_V;
1924                 l_MemProgCntl_u32 |=
1925                         ( (uint32_t) 0x00800000 >> f_Rank_u32 );
1926                 store32_ci( MemProgCntl_R, l_MemProgCntl_u32 );
1927
1928                 do {
1929                         l_MemProgCntl_u32 = load32_ci( MemProgCntl_R );
1930                 } while( ( l_MemProgCntl_u32 & IBIT(1) ) == 0 );
1931
1932                 l_CalC_u32 = ( ( load32_ci( CalC_R ) >> shft ) &
1933                                  (uint32_t) 0xf );
1934
1935                 if( l_CalC_u32 != (uint32_t) 0xa ) {
1936                         v--;
1937                         break;
1938                 }
1939
1940         }
1941
1942         if( v == (uint32_t) 0x100 ) {
1943                 v = (uint32_t) ~1;
1944         }
1945
1946         return v;
1947 }
1948 #endif
1949
1950 static uint32_t
1951 u4_RMDF_BLane( uint32_t f_Rank_u32, uint32_t f_BLane_u32 )
1952 {
1953         static const uint32_t MemProgCntl_V = (uint32_t) 0x80000f00;
1954         static const uint32_t CalConf0_V    = (uint32_t) 0x0000ac10;
1955         uint32_t l_MemProgCntl_u32;
1956         uint32_t l_CalConf0_u32;
1957         uint32_t l_MeasStat_u32;
1958         uint32_t l_CalC_u32;
1959         uint64_t MeasStat_R;
1960         uint64_t CalC_R;
1961         uint64_t VerC_R;
1962         uint32_t shft;
1963         uint32_t v;
1964
1965         if( f_BLane_u32 < 4 ) {
1966                 MeasStat_R   = MeasStatusC0_R;
1967                 CalC_R       = CalC0_R;
1968                 VerC_R       = RstLdEnVerniersC0_R;
1969         } else if( f_BLane_u32  <  8 ) {
1970                 f_BLane_u32 -= 4;
1971                 MeasStat_R   = MeasStatusC1_R;
1972                 CalC_R       = CalC1_R;
1973                 VerC_R       = RstLdEnVerniersC1_R;
1974         } else if( f_BLane_u32  < 12 ) {
1975                 f_BLane_u32 -= 8;
1976                 MeasStat_R   = MeasStatusC2_R;
1977                 CalC_R       = CalC2_R;
1978                 VerC_R       = RstLdEnVerniersC2_R;
1979         } else if( f_BLane_u32 == 16 ) {
1980                 f_BLane_u32  = 4;
1981                 MeasStat_R   = MeasStatusC1_R;
1982                 CalC_R       = CalC1_R;
1983                 VerC_R       = RstLdEnVerniersC1_R;
1984         } else if( f_BLane_u32 == 17 ) {
1985                 f_BLane_u32  = 4;
1986                 MeasStat_R   = MeasStatusC3_R;
1987                 CalC_R       = CalC3_R;
1988                 VerC_R       = RstLdEnVerniersC3_R;
1989         } else {
1990                 f_BLane_u32 -= 12;
1991                 MeasStat_R   = MeasStatusC3_R;
1992                 CalC_R       = CalC3_R;
1993                 VerC_R       = RstLdEnVerniersC3_R;
1994         }
1995
1996         shft = (uint32_t) 28 - ( f_BLane_u32 * 4 );
1997
1998         /*
1999          * start auto calibration logic & wait for completion
2000          */
2001         or32_ci( MeasStat_R, IBIT(0) );
2002
2003         do {
2004                 l_MeasStat_u32 = load32_ci( MeasStat_R );
2005         } while( ( l_MeasStat_u32 & IBIT(0) ) == 1 );
2006
2007         l_CalConf0_u32  = CalConf0_V;
2008         l_CalConf0_u32 |= ( f_BLane_u32 << 5 );
2009         store32_ci( CalConf0_R, l_CalConf0_u32 );
2010
2011         for( v = 0x000; v < (uint32_t) 0x100; v++ ) {
2012                 store32_ci( VerC_R, ( v << 24 ) | ( v << 16 ) );
2013
2014                 l_MemProgCntl_u32  = MemProgCntl_V;
2015                 l_MemProgCntl_u32 |=
2016                         ( (uint32_t) 0x00800000 >> f_Rank_u32 );
2017                 store32_ci( MemProgCntl_R, l_MemProgCntl_u32 );
2018
2019                 do {
2020                         l_MemProgCntl_u32 = load32_ci( MemProgCntl_R );
2021                 } while( ( l_MemProgCntl_u32 & IBIT(1) ) == 0 );
2022
2023                 l_CalC_u32 = ( ( load32_ci( CalC_R ) >> shft ) &
2024                                  (uint32_t) 0xf );
2025
2026                 if( l_CalC_u32 != (uint32_t) 0xa ) {
2027                         v--;
2028                         break;
2029                 }
2030
2031         }
2032
2033         if( v == (uint32_t) 0x100 ) {
2034                 v = (uint32_t) ~1;
2035         }
2036
2037         return v;
2038 }
2039
2040 static int32_t
2041 u4_RMDF_Rank( uint32_t  f_Rank_u32,
2042               uint32_t *f_Buf_pu32 )
2043 {
2044         int32_t  l_Err_pi32 = 0;
2045         uint32_t b;
2046
2047         for( b = 0; ( b < MAX_BLANE ) && ( l_Err_pi32 == 0 ); b++ ) {
2048                 f_Buf_pu32[b] = u4_RMDF_BLane( f_Rank_u32, b );
2049
2050                 if( f_Buf_pu32[b] == (uint32_t) ~0 ) {
2051                         f_Buf_pu32[b] = 0;
2052                         l_Err_pi32++;
2053                 } else if( f_Buf_pu32[b] == (uint32_t) ~1 ) {
2054                         f_Buf_pu32[b] = (uint32_t) 0xff;
2055                         l_Err_pi32++;
2056                 }
2057
2058         }
2059
2060         return l_Err_pi32;
2061 }
2062
2063 static int32_t
2064 u4_auto_calib_MemBus( auto_calib_t *f_ac_pt )
2065 {
2066         uint32_t RdMacDly, RdMacCnt;
2067         uint32_t ResMuxDly, ResMuxCnt;
2068         uint32_t RdPipeDly;
2069         uint32_t l_Buf_pu32[MAX_DRANKS][MAX_BLANE];
2070         uint32_t l_Rnk_pu32[MAX_DRANKS];
2071         uint32_t l_Ver_u32;
2072         int32_t  l_Err_i32;
2073         uint32_t bidx;
2074         uint32_t n, r, b;
2075
2076         /*
2077          * read starting delays out of the MemBus register
2078          */
2079         RdMacDly  = ( load32_ci( MemBusCnfg_R ) >> 28 ) & 0xf;
2080         ResMuxDly = ( load32_ci( MemBusCnfg_R ) >> 24 ) & 0xf;
2081
2082         /*
2083          * initialize ranks as not populated
2084          */
2085         for( r = 0; r < MAX_DRANKS; r++ ) {
2086                 l_Rnk_pu32[r] = 0;
2087         }
2088
2089         /*
2090          * run through every possible delays of
2091          * RdMacDly, ResMuxDly & RdPipeDly until
2092          * the first working configuration is found
2093          */
2094         RdPipeDly = 0;
2095         do {
2096                 and32_ci( MemBusCnfg2_R, ~0x3 );
2097                 or32_ci(  MemBusCnfg2_R, RdPipeDly );
2098
2099                 RdMacCnt  =  RdMacDly;
2100                 ResMuxCnt =  ResMuxDly;
2101
2102                 /*
2103                  * RdMacDly >= ResMuxDly
2104                  */
2105                 do {
2106                         and32_ci( MemBusCnfg_R, ( 1 << 24 ) - 1 );
2107                         or32_ci(  MemBusCnfg_R, ( RdMacCnt  << 28 ) |
2108                                                 ( ResMuxCnt << 24 ) );
2109                         and32_ci( MemBusCnfg2_R, ( 1 << 28 ) - 1 );
2110                         or32_ci(  MemBusCnfg2_R, ( RdMacCnt << 28 ) );
2111
2112                         /*
2113                          * check the current value for every installed
2114                          * DIMM on each side for every bytelane
2115                          */
2116                         l_Err_i32 = 0;
2117                         for( n = 0;
2118                              ( n < NUM_SLOTS ) &&
2119                              ( l_Err_i32 == 0 );
2120                              n += 2 ) {
2121
2122                                 if( m_dimm[n].m_pop_u32 ) {
2123                                         /*
2124                                          * run through all 18 bytelanes of every rank
2125                                          */
2126                                         for( r = n;
2127                                              ( r < n + m_dimm[n].m_rank_u32 ) &&
2128                                              ( l_Err_i32 == 0 );
2129                                              r++ ) {
2130                                                 l_Rnk_pu32[r] = 1;
2131
2132                                                 l_Err_i32 =
2133                                                 u4_RMDF_Rank( r,
2134                                                               &l_Buf_pu32[r][0] );
2135                                         }
2136
2137                                 }
2138
2139                         }
2140
2141                         /*
2142                          * decrementation before exit is wanted!
2143                          */
2144                         RdMacCnt--;
2145                         ResMuxCnt--;
2146                 } while( ( ResMuxCnt  > 0 ) &&
2147                          ( l_Err_i32 != 0 ) );
2148
2149                 if( l_Err_i32 != 0 ) {
2150                         RdPipeDly++;
2151                 }
2152
2153         } while( ( RdPipeDly   < 4 ) &&
2154                  ( l_Err_i32 != 0 ) );
2155
2156         /*
2157          * if l_Err_pi32 == 0 the auto calibration passed ok
2158          */
2159         if( l_Err_i32 != 0 ) {
2160                 return RET_ERR;
2161         }
2162
2163         /*
2164          * insert delay values into return struct
2165          */
2166         and32_ci( MemBusCnfg_R, ( 1 << 24 ) - 1 );
2167         or32_ci(  MemBusCnfg_R, ( RdMacCnt  << 28 ) |
2168                                 ( ResMuxCnt << 24 ) );
2169         and32_ci( MemBusCnfg2_R, ( ( 1 << 28 ) - 1 ) & ~0x3 );
2170         or32_ci(  MemBusCnfg2_R, ( RdMacCnt << 28 ) | RdPipeDly );
2171
2172         f_ac_pt->m_MemBusCnfg_u32  = load32_ci( MemBusCnfg_R );
2173         f_ac_pt->m_MemBusCnfg2_u32 = load32_ci( MemBusCnfg2_R );
2174
2175         /*
2176          * calculate the average vernier setting for the
2177          * bytelanes which share one vernier
2178          */
2179         for( b = 0; b < MAX_BLANE - 2; b += 2 ) {
2180                 n         = 0;
2181                 l_Ver_u32 = 0;
2182
2183                 for( r = 0; r < MAX_DRANKS; r++ ) {
2184                         /*
2185                          * calculation is done or populated ranks only
2186                          */
2187                         if( l_Rnk_pu32[r] != 0 ) {
2188                                 /*
2189                                  * calculate average value
2190                                  */
2191                                 l_Ver_u32 += l_Buf_pu32[r][b];
2192                                 l_Ver_u32 += l_Buf_pu32[r][b+1];
2193                                 n         += 2;
2194
2195                                 if( b == 4 ) {
2196                                         l_Ver_u32 += l_Buf_pu32[r][16];
2197                                         n++;
2198                                 } else if( b == 12 ) {
2199                                         l_Ver_u32 += l_Buf_pu32[r][17];
2200                                         n++;
2201                                 }
2202
2203                         }
2204
2205                 }
2206
2207                 /*
2208                  * average the values
2209                  */
2210                 l_Ver_u32 /= n;
2211
2212                 /*
2213                  * set appropriate vernier register for
2214                  * the current bytelane
2215                  */
2216                 bidx = ( b >> 2 );
2217                 if( ( b & (uint32_t) 0x3 ) == 0 ) {
2218                         l_Ver_u32 <<= 24;
2219                         f_ac_pt->m_RstLdEnVerniers_pu32[bidx]  = l_Ver_u32;
2220                 } else {
2221                         l_Ver_u32 <<= 16;
2222                         f_ac_pt->m_RstLdEnVerniers_pu32[bidx] |= l_Ver_u32;
2223                 }
2224
2225         }
2226
2227         return RET_OK;
2228 }
2229
2230 static int32_t
2231 u4_auto_calib( auto_calib_t *f_ac_pt )
2232 {
2233         uint32_t l_MemBusCnfg_S;
2234         uint32_t l_MemBusCnfg2_S;
2235         uint32_t l_RstLdEnVerniers_S[4];
2236         int32_t  l_Ret_i32;
2237
2238         /*
2239          * save manipulated registers
2240          */
2241         l_MemBusCnfg_S         = load32_ci( MemBusCnfg_R );
2242         l_MemBusCnfg2_S        = load32_ci( MemBusCnfg2_R );
2243         l_RstLdEnVerniers_S[0] = load32_ci( RstLdEnVerniersC0_R );
2244         l_RstLdEnVerniers_S[1] = load32_ci( RstLdEnVerniersC1_R );
2245         l_RstLdEnVerniers_S[2] = load32_ci( RstLdEnVerniersC2_R );
2246         l_RstLdEnVerniers_S[3] = load32_ci( RstLdEnVerniersC3_R );
2247
2248         u4_auto_calib_init();
2249         l_Ret_i32 = u4_auto_calib_MemBus( f_ac_pt );
2250
2251         /*
2252          * restore manipulated registers
2253          */
2254         store32_ci( MemBusCnfg_R,  l_MemBusCnfg_S );
2255         store32_ci( MemBusCnfg2_R, l_MemBusCnfg2_S );
2256         store32_ci( RstLdEnVerniersC0_R, l_RstLdEnVerniers_S[0] );
2257         store32_ci( RstLdEnVerniersC1_R, l_RstLdEnVerniers_S[1] );
2258         store32_ci( RstLdEnVerniersC2_R, l_RstLdEnVerniers_S[2] );
2259         store32_ci( RstLdEnVerniersC3_R, l_RstLdEnVerniers_S[3] );
2260
2261         return l_Ret_i32;
2262 }
2263
2264 static int32_t
2265 u4_checkeccerr( eccerror_t *f_ecc_pt )
2266 {
2267         uint32_t l_val_u32;
2268         int32_t  ret = RET_OK;
2269
2270         l_val_u32   = load32_ci( MESR_R );
2271         l_val_u32 >>= 29;
2272
2273         if( ( l_val_u32 & (uint32_t) 0x7 ) != 0 ) {
2274
2275                 if(        ( l_val_u32 & (uint32_t) 0x4 ) != 0 ) {
2276                         /* UE */
2277                         ret = RET_ACERR_UE;
2278                 } else if( ( l_val_u32 & (uint32_t) 0x1 ) != 0 ) {
2279                         /* UEWT */
2280                         ret = RET_ACERR_UEWT;
2281                 } else {
2282                         /* CE */
2283                         ret = RET_ACERR_CE;
2284                 }
2285
2286         }
2287
2288         f_ecc_pt->m_err_i32   = ret;
2289
2290         l_val_u32             = load32_ci( MEAR1_R );
2291         f_ecc_pt->m_uecnt_u32 = ( ( l_val_u32 >> 24 ) & (uint32_t) 0xff );
2292         f_ecc_pt->m_cecnt_u32 = ( ( l_val_u32 >> 16 ) & (uint32_t) 0xff );
2293
2294         l_val_u32             = load32_ci( MEAR0_R );
2295         f_ecc_pt->m_rank_u32  = ( ( l_val_u32 >> 29 ) & (uint32_t) 0x7 );
2296         f_ecc_pt->m_col_u32   = ( ( l_val_u32 >> 18 ) & (uint32_t) 0x7ff );
2297         f_ecc_pt->m_row_u32   = ( ( l_val_u32 >>  0 ) & (uint32_t) 0x7fff );
2298         f_ecc_pt->m_bank_u32  = ( ( l_val_u32 >> 15 ) & (uint32_t) 0x7 );
2299
2300         return ret;
2301 }
2302
2303 static uint32_t
2304 u4_CalcScrubEnd( void )
2305 {
2306         uint64_t l_scrend_u64 = m_memsize_u64;
2307
2308         /*
2309          * check for memory hole at 2GB
2310          */
2311         if( l_scrend_u64 > _2GB ) {
2312                 l_scrend_u64 += _2GB;
2313         }
2314
2315         l_scrend_u64 -= 0x40;
2316         l_scrend_u64 /= 0x10;
2317
2318         return( (uint32_t) l_scrend_u64 );
2319 }
2320
2321 static int32_t
2322 u4_Scrub( uint32_t f_scrub_u32, uint32_t f_pattern_u32, eccerror_t *f_eccerr_pt )
2323 {
2324         uint32_t i;
2325         int32_t  ret;
2326
2327         /*
2328          * setup scrub parameters
2329          */
2330         store32_ci( MSCR_R, 0 );                        // stop scrub
2331         store32_ci( MSRSR_R, 0x0 );                     // set start
2332         store32_ci( MSRER_R, u4_CalcScrubEnd() );       // set end
2333         store32_ci( MSPR_R, f_pattern_u32 );            // set pattern
2334
2335         /*
2336          * clear out ECC error registers
2337          */
2338         store32_ci( MEAR0_R, 0x0 );
2339         store32_ci( MEAR1_R, 0x0 );
2340         store32_ci( MESR_R, 0x0 );
2341
2342         /*
2343          * Setup Scrub Type
2344          */
2345         store32_ci( MSCR_R, f_scrub_u32 );
2346
2347         if( f_scrub_u32 != BACKGROUND_SCRUB ) {
2348                 /*
2349                  * wait for scrub to complete
2350                  */
2351                 do {
2352                         progbar();
2353                         dly( 15000000 );
2354                         i = load32_ci( MSCR_R );
2355                 } while( ( i & f_scrub_u32 ) != 0 );
2356
2357                 ret = u4_checkeccerr( f_eccerr_pt );
2358         } else {
2359                 ret = RET_OK;
2360         }
2361
2362         return ret;
2363 }
2364
2365 static eccerror_t
2366 u4_InitialScrub( void )
2367 {
2368         eccerror_t l_eccerr_st[2];
2369         int32_t    l_err_i32[2] = { 0, 0 };
2370
2371         l_err_i32[0] = u4_Scrub( IMMEDIATE_SCRUB_WITH_FILL, 0x0, &l_eccerr_st[0] );
2372
2373         if( l_err_i32[0] >= -1 /*CE*/ ) {
2374                 l_err_i32[1] = u4_Scrub( IMMEDIATE_SCRUB, 0x0, &l_eccerr_st[1] );
2375         }
2376
2377         if( l_err_i32[0] < l_err_i32[1] ) {
2378                 return l_eccerr_st[0];
2379         } else {
2380                 return l_eccerr_st[1];
2381         }
2382
2383 }
2384
2385 /*
2386  * RND: calculates Timer cycles from the given frequency
2387  *      divided by the clock frequency. Values are rounded
2388  *      up to the nearest integer value if the division is not even.
2389  */
2390 #define RND( tXXX )     ( ( ( tXXX ) + tCK - 1 ) / tCK )
2391
2392 static void
2393 u4_MemInitSequence( uint32_t tRP, uint32_t tWR, uint32_t tRFC, uint32_t CL,
2394                     uint32_t tCK, uint32_t TD )
2395 {
2396         /*
2397          * DIMM init sequence
2398          */
2399         static const uint32_t INI_SEQ[] = {
2400                 0xa0000400, 0x80020000, 0x80030000, 0x80010404,
2401                 0x8000100a, 0xa0000400, 0x90000000, 0x90000000,
2402                 0x8ff0100a, 0x80010784, 0x80010404, 0x00000000,
2403                 0x00000000, 0x00000000, 0x00000000, 0x00000000
2404         };
2405
2406         uint32_t l_MemInit_u32;
2407         uint64_t r;
2408         uint32_t i;
2409
2410         for( r = MemInit00_R, i = 0; r <= MemInit15_R; r += 0x10, i++ ) {
2411                 l_MemInit_u32 = INI_SEQ[i];
2412
2413                 switch( i ) {
2414                         case 0:
2415                         case 5: {
2416                                 l_MemInit_u32 |= ( ( RND( tRP ) - TD )  << 20 );
2417                                 break;
2418                         }
2419                         case 3: {
2420                                 store32_ci( EMRSRegCntl_R, l_MemInit_u32 &
2421                                                            (uint32_t) 0xffff );
2422                                 break;
2423                         }
2424                         case 4: {
2425                                 l_MemInit_u32 |= IBIT(23);
2426                         }
2427                         case 8: {
2428                                 l_MemInit_u32 |= ( ( RND( tWR ) - 1 )  <<  9 );
2429                                 l_MemInit_u32 |= ( CL                  <<  4 );
2430
2431                                 store32_ci( MRSRegCntl_R, l_MemInit_u32 &
2432                                                           (uint32_t) 0xffff );
2433                                 break;
2434                         }
2435                         case 6:
2436                         case 7: {
2437                                 l_MemInit_u32 |= ( ( RND( tRFC ) - TD ) << 20 );
2438                                 break;
2439                         }
2440
2441                 }
2442
2443                 store32_ci( r, l_MemInit_u32 );
2444
2445 #ifdef U4_SHOW_REGS
2446                 printf( "\r\nMemInit%02d (0x%04X): 0x%08X", i, (uint16_t) r, l_MemInit_u32 );
2447 #endif
2448         }
2449 #ifdef U4_SHOW_REGS
2450         printf( "\r\n" );
2451 #endif
2452         /*
2453          * Kick off memory init sequence & wait for completion
2454          */
2455         store32_ci( MemProgCntl_R, IBIT(0) );
2456
2457         do {
2458                 i = load32_ci( MemProgCntl_R );
2459         } while( ( i & IBIT(1) ) == 0 );
2460
2461 }
2462
2463 /*
2464  * static DIMM configuartion settings
2465  */
2466 static reg_statics_t reg_statics_maui[NUM_SPEED_IDX] = {
2467         {       /* 400 Mhz */
2468                 .RRMux          = 1,
2469                 .WRMux          = 1,
2470                 .WWMux          = 1,
2471                 .RWMux          = 1,
2472
2473                 .MemRdQCnfg     = 0x20020820,
2474                 .MemWrQCnfg     = 0x40041040,
2475                 .MemQArb        = 0x00000000,
2476                 .MemRWArb       = 0x30413cc0,
2477
2478                 .ODTCntl        = 0x60000000,
2479                 .IOPadCntl      = 0x001a4000,
2480                 .MemPhyModeCntl = 0x00000000,
2481                 .OCDCalCntl     = 0x00000000,
2482                 .OCDCalCmd      = 0x00000000,
2483
2484                 .CKDelayL       = 0x34000000,
2485                 .CKDelayU       = 0x34000000,
2486
2487                 .MemBusCnfg     = 0x00000050                  |
2488                                   ( (   MAX_RMD       << 28 ) |
2489                                     ( ( MAX_RMD - 2 ) << 24 ) ),
2490
2491                 .CAS1Dly0       = 0,
2492                 .CAS1Dly1       = 0,
2493
2494                 .ByteWrClkDel   = {
2495                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2496                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2497                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2498                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2499                         0x00000000, 0x00000000
2500                 },
2501                 .ReadStrobeDel  = {
2502                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2503                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2504                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2505                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2506                         0x00000000, 0x00000000
2507                 }
2508
2509         },
2510         {       /* 533 Mhz */
2511                 .RRMux          = 1,
2512                 .WRMux          = 1,
2513                 .WWMux          = 1,
2514                 .RWMux          = 1,
2515
2516                 .MemRdQCnfg     = 0x20020820,
2517                 .MemWrQCnfg     = 0x40041040,
2518                 .MemQArb        = 0x00000000,
2519                 .MemRWArb       = 0x30413cc0,
2520
2521                 .ODTCntl        = 0x60000000,
2522                 .IOPadCntl      = 0x001a4000,
2523                 .MemPhyModeCntl = 0x00000000,
2524                 .OCDCalCntl     = 0x00000000,
2525                 .OCDCalCmd      = 0x00000000,
2526
2527                 .CKDelayL       = 0x18000000,
2528                 .CKDelayU       = 0x18000000,
2529
2530                 .MemBusCnfg     = 0x00002070                  |
2531                                   ( (   MAX_RMD       << 28 ) |
2532                                     ( ( MAX_RMD - 3 ) << 24 ) ),
2533
2534                 .CAS1Dly0       = 0,
2535                 .CAS1Dly1       = 0,
2536
2537                 .ByteWrClkDel   = {
2538
2539                         0x12000000, 0x12000000, 0x12000000 , 0x12000000,
2540                         0x12000000, 0x12000000, 0x12000000 , 0x12000000,
2541                         0x12000000, 0x12000000, 0x12000000 , 0x12000000,
2542                         0x12000000, 0x12000000, 0x12000000 , 0x12000000,
2543                         0x12000000, 0x12000000
2544                 },
2545                 .ReadStrobeDel  = {
2546                         0x00000000, 0x00000000, 0x00000000 , 0x00000000,
2547                         0x00000000, 0x00000000, 0x00000000 , 0x00000000,
2548                         0x00000000, 0x00000000, 0x00000000 , 0x00000000,
2549                         0x00000000, 0x00000000, 0x00000000 , 0x00000000,
2550                         0x00000000, 0x00000000
2551                 }
2552
2553         },
2554         {       /* 667 Mhz */
2555                 .RRMux          = 1,
2556                 .WRMux          = 1,
2557                 .WWMux          = 1,
2558                 .RWMux          = 3,
2559
2560                 .MemRdQCnfg     = 0x20020820,
2561                 .MemWrQCnfg     = 0x40041040,
2562                 .MemQArb        = 0x00000000,
2563                 .MemRWArb       = 0x30413cc0,
2564
2565                 .ODTCntl        = 0x60000000,
2566                 .IOPadCntl      = 0x001a4000,
2567                 .MemPhyModeCntl = 0x00000000,
2568                 .OCDCalCntl     = 0x00000000,
2569                 .OCDCalCmd      = 0x00000000,
2570
2571                 .CKDelayL       = 0x0a000000,
2572                 .CKDelayU       = 0x0a000000,
2573
2574                 .MemBusCnfg     = 0x000040a0                  |
2575                                   ( (   MAX_RMD       << 28 ) |
2576                                     ( ( MAX_RMD - 3 ) << 24 ) ),
2577
2578                 .CAS1Dly0       = 2,
2579                 .CAS1Dly1       = 2,
2580
2581                 .ByteWrClkDel   = {
2582
2583                         0x12000000, 0x12000000, 0x12000000, 0x12000000,
2584                         0x12000000, 0x12000000, 0x12000000, 0x12000000,
2585                         0x12000000, 0x12000000, 0x12000000, 0x12000000,
2586                         0x12000000, 0x12000000, 0x12000000, 0x12000000,
2587                         0x12000000, 0x12000000
2588 /*
2589                         0x31000000, 0x31000000, 0x31000000, 0x31000000,
2590                         0x31000000, 0x31000000, 0x31000000, 0x31000000,
2591                         0x31000000, 0x31000000, 0x31000000, 0x31000000,
2592                         0x31000000, 0x31000000, 0x31000000, 0x31000000,
2593                         0x31000000, 0x31000000
2594 */
2595                 },
2596                 .ReadStrobeDel  = {
2597                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2598                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2599                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2600                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2601                         0x00000000, 0x00000000
2602                 }
2603
2604         }
2605 };
2606
2607 static reg_statics_t reg_statics_bimini[NUM_SPEED_IDX] = {
2608         {       /* 400 Mhz */
2609                 .RRMux          = 2,
2610                 .WRMux          = 2,
2611                 .WWMux          = 2,
2612                 .RWMux          = 2,
2613
2614                 .MemRdQCnfg     = 0x20020820,
2615                 .MemWrQCnfg     = 0x40041040,
2616                 .MemQArb        = 0x00000000,
2617                 .MemRWArb       = 0x30413cc0,
2618
2619                 .ODTCntl        = 0x40000000,
2620                 .IOPadCntl      = 0x001a4000,
2621                 .MemPhyModeCntl = 0x00000000,
2622                 .OCDCalCntl     = 0x00000000,
2623                 .OCDCalCmd      = 0x00000000,
2624
2625                 .CKDelayL       = 0x00000000,
2626                 .CKDelayU       = 0x28000000,
2627
2628                 .MemBusCnfg     = 0x00552070                  |
2629                                   ( (   MAX_RMD       << 28 ) |
2630                                     ( ( MAX_RMD - 2 ) << 24 ) ),
2631
2632                 .CAS1Dly0       = 0,
2633                 .CAS1Dly1       = 0,
2634
2635                 .ByteWrClkDel   = {
2636                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2637                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2638                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2639                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2640                         0x00000000, 0x00000000
2641                 },
2642                 .ReadStrobeDel  = {
2643                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2644                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2645                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2646                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2647                         0x00000000, 0x00000000
2648                 }
2649
2650         },
2651         {       /* 533 Mhz */
2652                 .RRMux          = 3,
2653                 .WRMux          = 3,
2654                 .WWMux          = 3,
2655                 .RWMux          = 3,
2656
2657                 .MemRdQCnfg     = 0x20020820,
2658                 .MemWrQCnfg     = 0x40041040,
2659                 .MemQArb        = 0x00000000,
2660                 .MemRWArb       = 0x30413cc0,
2661
2662                 .ODTCntl        = 0x40000000,
2663                 .IOPadCntl      = 0x001a4000,
2664                 .MemPhyModeCntl = 0x00000000,
2665                 .OCDCalCntl     = 0x00000000,
2666                 .OCDCalCmd      = 0x00000000,
2667
2668                 .CKDelayL       = 0x00000000,
2669                 .CKDelayU       = 0x20000000,
2670
2671                 .MemBusCnfg     = 0x00644190                  |
2672                                   ( (   MAX_RMD       << 28 ) |
2673                                     ( ( MAX_RMD - 3 ) << 24 ) ),
2674
2675                 .CAS1Dly0       = 2,
2676                 .CAS1Dly1       = 2,
2677
2678                 .ByteWrClkDel   = {
2679                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2680                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2681                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2682                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2683                         0x14000000, 0x14000000
2684                 },
2685                 .ReadStrobeDel  = {
2686                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2687                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2688                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2689                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2690                         0x00000000, 0x00000000
2691                 }
2692
2693         },
2694         {       /* 667 Mhz */
2695                 .RRMux          = 3,
2696                 .WRMux          = 3,
2697                 .WWMux          = 3,
2698                 .RWMux          = 3,
2699
2700                 .MemRdQCnfg     = 0x20020820,
2701                 .MemWrQCnfg     = 0x40041040,
2702                 .MemQArb        = 0x00000000,
2703                 .MemRWArb       = 0x30413cc0,
2704
2705                 .ODTCntl        = 0x40000000,
2706                 .IOPadCntl      = 0x001a4000,
2707                 .MemPhyModeCntl = 0x00000000,
2708                 .OCDCalCntl     = 0x00000000,
2709                 .OCDCalCmd      = 0x00000000,
2710
2711                 .CKDelayL       = 0x00000000,
2712                 .CKDelayU       = 0x00000000,
2713
2714                 .MemBusCnfg     = 0x00666270                  |
2715                                   ( (   MAX_RMD       << 28 ) |
2716                                     ( ( MAX_RMD - 3 ) << 24 ) ),
2717
2718                 .CAS1Dly0       = 2,
2719                 .CAS1Dly1       = 2,
2720
2721                 .ByteWrClkDel   = {
2722                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2723                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2724                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2725                         0x14000000, 0x14000000, 0x14000000, 0x14000000,
2726                         0x14000000, 0x14000000
2727                 },
2728                 .ReadStrobeDel  = {
2729                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2730                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2731                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2732                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2733                         0x00000000, 0x00000000
2734                 }
2735
2736         }
2737 };
2738
2739 static reg_statics_t reg_statics_kauai[NUM_SPEED_IDX] = {
2740         {       /* 400 Mhz */
2741                 .RRMux          = 0,
2742                 .WRMux          = 0,
2743                 .WWMux          = 0,
2744                 .RWMux          = 0,
2745
2746                 .MemRdQCnfg     = 0,
2747                 .MemWrQCnfg     = 0,
2748                 .MemQArb        = 0,
2749                 .MemRWArb       = 0,
2750
2751                 .ODTCntl        = 0,
2752                 .IOPadCntl      = 0,
2753                 .MemPhyModeCntl = 0,
2754                 .OCDCalCntl     = 0,
2755                 .OCDCalCmd      = 0,
2756
2757                 .CKDelayL       = 0,
2758                 .CKDelayU       = 0,
2759
2760                 .MemBusCnfg     = 0,
2761
2762                 .CAS1Dly0       = 0,
2763                 .CAS1Dly1       = 0,
2764
2765                 .ByteWrClkDel   = {
2766                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2767                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2768                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2769                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2770                         0x00000000, 0x00000000
2771                 },
2772                 .ReadStrobeDel  = {
2773                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2774                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2775                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2776                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2777                         0x00000000, 0x00000000
2778                 }
2779
2780         },
2781         {       /* 533 Mhz */
2782                 .RRMux          = 0,
2783                 .WRMux          = 0,
2784                 .WWMux          = 0,
2785                 .RWMux          = 0,
2786
2787                 .MemRdQCnfg     = 0,
2788                 .MemWrQCnfg     = 0,
2789                 .MemQArb        = 0,
2790                 .MemRWArb       = 0,
2791
2792                 .ODTCntl        = 0,
2793                 .IOPadCntl      = 0,
2794                 .MemPhyModeCntl = 0,
2795                 .OCDCalCntl     = 0,
2796                 .OCDCalCmd      = 0,
2797
2798                 .CKDelayL       = 0,
2799                 .CKDelayU       = 0,
2800
2801                 .MemBusCnfg     = 0,
2802
2803                 .CAS1Dly0       = 0,
2804                 .CAS1Dly1       = 0,
2805
2806                 .ByteWrClkDel   = {
2807                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2808                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2809                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2810                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2811                         0x00000000, 0x00000000
2812                 },
2813                 .ReadStrobeDel  = {
2814                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2815                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2816                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2817                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2818                         0x00000000, 0x00000000
2819                 }
2820
2821         },
2822         {       /* 667 Mhz */
2823                 .RRMux          = 0,
2824                 .WRMux          = 0,
2825                 .WWMux          = 0,
2826                 .RWMux          = 0,
2827
2828                 .MemRdQCnfg     = 0,
2829                 .MemWrQCnfg     = 0,
2830                 .MemQArb        = 0,
2831                 .MemRWArb       = 0,
2832
2833                 .ODTCntl        = 0,
2834                 .IOPadCntl      = 0,
2835                 .MemPhyModeCntl = 0,
2836                 .OCDCalCntl     = 0,
2837                 .OCDCalCmd      = 0,
2838
2839                 .CKDelayL       = 0,
2840                 .CKDelayU       = 0,
2841
2842                 .MemBusCnfg     = 0,
2843
2844                 .CAS1Dly0       = 0,
2845                 .CAS1Dly1       = 0,
2846
2847                 .ByteWrClkDel   = {
2848                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2849                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2850                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2851                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2852                         0x00000000, 0x00000000
2853                 },
2854                 .ReadStrobeDel  = {
2855                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2856                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2857                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2858                         0x00000000, 0x00000000, 0x00000000, 0x00000000,
2859                         0x00000000, 0x00000000
2860                 }
2861
2862         }
2863 };
2864
2865 static int32_t
2866 u4_start( eccerror_t *f_ecc_pt )
2867 {
2868         /*
2869          * maximum runs for auto calibration
2870          */
2871         static const uint32_t MAX_ACERR = (uint32_t) 5;
2872
2873         /*
2874          * fixed u4/DIMM timer/timing values for calculation
2875          */
2876         static const uint32_t TD      = (uint32_t) 2;   // u4 delay cycles for loading a timer
2877         static const uint32_t AL      = (uint32_t) 0;   // additional latency (fix)
2878         static const uint32_t BL      = (uint32_t) 4;   // burst length (fix)
2879
2880         uint32_t              SPEED   = m_gendimm.m_speed_pu32[m_dclidx_u32];
2881         uint32_t              CL      = m_gendimm.m_clval_pu32[m_dclidx_u32];
2882         uint32_t              RL      = AL + CL;
2883         uint32_t              WL      = RL - 1;
2884         uint32_t              tCK     = m_gendimm.m_tCK_pu32[m_dclidx_u32];
2885         uint32_t              tRAS    = m_gendimm.m_tRAS_u32;
2886         uint32_t              tRTP    = m_gendimm.m_tRTP_u32;
2887         uint32_t              tRP     = m_gendimm.m_tRP_u32;
2888         uint32_t              tWR     = m_gendimm.m_tWR_u32;
2889         uint32_t              tRRD    = m_gendimm.m_tRRD_u32;
2890         uint32_t              tRC     = m_gendimm.m_tRC_u32;
2891         uint32_t              tRCD    = m_gendimm.m_tRCD_u32;
2892         uint32_t              tWTR    = m_gendimm.m_tWTR_u32;
2893         uint32_t              tRFC    = m_gendimm.m_tRFC_u32;
2894         uint32_t              tREF    = m_gendimm.m_tREF_u32;
2895
2896         reg_statics_t *rst = 0;
2897
2898         uint32_t       l_RAS0_u32;
2899         uint32_t       l_RAS1_u32;
2900         uint32_t       l_CAS0_u32;
2901         uint32_t       l_CAS1_u32;
2902         uint32_t       l_MemRfshCntl_u32;
2903         uint32_t       l_UsrCnfg_u32;
2904         uint32_t       l_DmCnfg_u32;
2905
2906         uint32_t       l_MemArbWt_u32;
2907         uint32_t       l_MemRWArb_u32;
2908         uint32_t       l_MemBusCnfg_u32;
2909
2910         auto_calib_t   l_ac_st;
2911         int32_t        l_ac_i32;
2912         uint32_t       l_acerr_i32;
2913         uint32_t       sidx;
2914         uint32_t       i, j, t0, t1;
2915
2916         /*
2917          * set index for different 400/533/667 Mhz setup
2918          */
2919         switch( SPEED ) {
2920                 case 400:
2921                 case 533:
2922                 case 667: {
2923                         sidx  = SPEED;
2924                         sidx -= 400;
2925                         sidx /= 133;
2926                         break;
2927                 }
2928
2929                 default: {
2930                         #ifdef U4_DEBUG2
2931                         printf( "\r\n-> DIMM speed of %03u not supported\r\n",
2932                                 m_gendimm.m_speed_pu32[m_dclidx_u32]  );
2933                         #endif
2934                         return RET_ERR;
2935                 }
2936
2937         }
2938
2939         /*
2940          * setup pointer to the static register settings
2941          */
2942         if( IS_MAUI ) {
2943                 rst = &reg_statics_maui[sidx];
2944         } else if( IS_BIMINI ) {
2945                 rst = &reg_statics_bimini[sidx];
2946         } else if( IS_KAUAI ) {
2947                 rst = &reg_statics_kauai[sidx];
2948         }
2949
2950         /*
2951          * Switch off Fast Path by default for all DIMMs
2952          * running with more than 400Mhz
2953          */
2954         if( SPEED == 400 ) {
2955                 or32_ci( APIMemRdCfg_R, IBIT(30) );
2956                 #ifdef U4_INFO
2957                 printf( "  [fastpath        :        ON]\r\n" );
2958                 #endif
2959         } else {
2960                 and32_ci( APIMemRdCfg_R, ~IBIT(30) );
2961                 #ifdef U4_INFO
2962                 printf( "  [fastpath        :       OFF]\r\n" );
2963                 #endif
2964         }
2965
2966
2967         #ifdef U4_INFO
2968         printf( "  [register setup  :          ]" );
2969         #endif
2970
2971         /*
2972          * setup RAS/CAS timers2
2973          * NOTE: subtract TD from all values because of the delay
2974          * caused by reloading timers (see spec)
2975          */
2976
2977         /*
2978          * RAS Timer 0
2979          */
2980         // TiAtP = RND(tRAS) -> RAS0[0:4]
2981         l_RAS0_u32  = ( ( RND( tRAS )                           - TD ) << 27 );
2982         // TiRtP = AL + BL/2 - 2 + RND(tRTP) -> RAS01[5:9]
2983         l_RAS0_u32 |= ( ( AL + BL/2 - 2 + RND( tRTP )           - TD ) << 22 );
2984         // TiWtP = WL + BL/2 + RND(tWR) -> RAS0[10:14]
2985         l_RAS0_u32 |= ( ( WL + BL/2 + RND( tWR )                - TD ) << 17 );
2986         // TiPtA = RND(tRP) -> RAS0[15:19]
2987         l_RAS0_u32 |= ( ( RND( tRP )                            - TD ) << 12 );
2988         // TiPAtA = RND(tRP) or
2989         //          RND(tRP) + 1 for 8 bank devices -> RAS0[20:24]
2990         if( m_gendimm.m_bankcnt_u32 <= 4 ) {
2991                 l_RAS0_u32 |= ( ( RND( tRP )                    - TD ) <<  7 );
2992         } else {
2993                 l_RAS0_u32 |= ( ( RND( tRP ) + 1                - TD ) <<  7 );
2994         }
2995
2996         /*
2997          * RAS Timer 1
2998          */
2999         // TiRAPtA = AL + BL/2 - 2 + RND(tRTP + tRP) -> RAS1[0:4]
3000         l_RAS1_u32  = ( ( AL + BL/2 - 2 + RND( tRTP + tRP )     - TD ) << 27 );
3001         // TiWAPtA = CL + AL + BL/2 - 1 + RND(tWR + tRP) -> RAS1[5:9]
3002         l_RAS1_u32 |= ( ( CL + AL + BL/2 - 1 + RND( tWR + tRP ) - TD ) << 22 );
3003         // TiAtARk = tRRD -> RAS1[10:14]
3004         l_RAS1_u32 |= ( ( RND( tRRD )                           - TD ) << 17 );
3005         // TiAtABk = tRC -> RAS1[15:19]
3006         l_RAS1_u32 |= ( ( RND( tRC )                            - TD ) << 12 );
3007         // TiAtRW = tRCD -> RAS1[20:24]
3008         l_RAS1_u32 |= ( ( RND( tRCD )                           - TD ) <<  7 );
3009         // TiSAtARk Win = 4 * tRRD + 2 -> RAS1[25:29]
3010         l_RAS1_u32 |= ( ( RND( 4 * tRRD ) + 2                   - TD ) <<  2 );
3011
3012         /*
3013          * CAS Timer 0
3014          */
3015         // TiRtRRk = BL/2 -> CAS0[0:4]
3016         l_CAS0_u32  = ( ( BL/2                                  - TD ) << 27 );
3017         // TiRtRDm = BL/2 + 1 -> CAS0[5:9]
3018         l_CAS0_u32 |= ( ( BL/2 + 1                              - TD ) << 22 );
3019         // TiRtRSy = BL/2 + RRMux -> CAS0[10:14]
3020         l_CAS0_u32 |= ( ( BL/2 + rst->RRMux                     - TD ) << 17 );
3021         // TiWtRRk = CL - 1 + BL/2 + tWTR ->CAS0[15:19]
3022         l_CAS0_u32 |= ( ( CL - 1 + BL/2 + RND( tWTR )           - TD ) << 12 );
3023         // TiWtRDm = BL/2 + 1 -> CAS0[20:24]
3024         l_CAS0_u32 |= ( ( BL/2 + 1                              - TD ) <<  7 );
3025         // TiWtRSy = BL/2 + WRMux -> CAS0[25:29]
3026         l_CAS0_u32 |= ( ( BL/2 + rst->WRMux                     - TD ) <<  2 );
3027
3028         /*
3029          * CAS Timer 1
3030          */
3031         // TiWtWRk = BL/2 -> CAS1[0:4]
3032         l_CAS1_u32  = ( ( BL/2                                  - TD ) << 27 );
3033         // TiWtWDm = BL/2 + 1 -> CAS1[5:9]
3034         l_CAS1_u32 |= ( ( BL/2 + 1                              - TD ) << 22 );
3035         // TiWtWSy = BL/2 + WWMux -> CAS1[10:14]
3036         l_CAS1_u32 |= ( ( BL/2 + rst->WWMux                     - TD ) << 17 );
3037         // TiRtWRk = BL/2 + 2 -> CAS1[15:19]
3038         l_CAS1_u32 |= ( ( BL/2 + 2            + rst->CAS1Dly0   - TD ) << 12 );
3039         // TiRtWDm = BL/2 + 2 -> CAS1[20:24]
3040         l_CAS1_u32 |= ( ( BL/2 + 2            + rst->CAS1Dly1   - TD ) <<  7 );
3041         // TiRtWSy = BL/2 + RWMux + 1 -> CAS1[25:29]
3042         l_CAS1_u32 |= ( ( BL/2 + rst->RWMux + 1                 - TD ) <<  2 );
3043
3044         store32_ci( RASTimer0_R, l_RAS0_u32 );
3045         store32_ci( RASTimer1_R, l_RAS1_u32 );
3046         store32_ci( CASTimer0_R, l_CAS0_u32 );
3047         store32_ci( CASTimer1_R, l_CAS1_u32 );
3048
3049         /*
3050          * Mem Refresh Control register
3051          */
3052         l_MemRfshCntl_u32  = ( ( ( tREF / tCK ) / 16 ) << 23 );
3053         l_MemRfshCntl_u32 |= ( ( RND( tRFC )    - TD ) <<  8 );
3054         store32_ci( MemRfshCntl_R, l_MemRfshCntl_u32 );
3055
3056         /*
3057          * setup DmXCnfg registers
3058          */
3059         store32_ci( Dm0Cnfg_R, (uint32_t) 0x0 );
3060         store32_ci( Dm1Cnfg_R, (uint32_t) 0x0 );
3061         store32_ci( Dm2Cnfg_R, (uint32_t) 0x0 );
3062         store32_ci( Dm3Cnfg_R, (uint32_t) 0x0 );
3063
3064         /*
3065          * create DmCnfg & UsrCnfg values out of group data
3066          */
3067         l_UsrCnfg_u32 = 0;
3068         for( i = 0; i < m_dgrcnt_u32; i++ ) {
3069                 l_DmCnfg_u32  = ( m_dgrptr[i]->m_add2g_u32 << 27 );
3070                 l_DmCnfg_u32 |= ( m_dgrptr[i]->m_sub2g_u32 << 19 );
3071                 l_DmCnfg_u32 |= ( m_dgrptr[i]->m_memmd_u32 << 12 );
3072                 l_DmCnfg_u32 |= ( m_dgrptr[i]->m_start_u32 <<  3 );
3073                 l_DmCnfg_u32 |= ( m_dgrptr[i]->m_ss_u32    <<  1 );
3074                 l_DmCnfg_u32 |= IBIT(31);       // enable bit
3075
3076                 /*
3077                  * write value into DmXCnfg registers
3078                  */
3079                 for( j = 0; j < m_dgrptr[i]->m_dcnt_u32; j++ ) {
3080                         t0 = m_dgrptr[i]->m_dptr[j]->m_bank_u32;
3081                         t1 = Dm0Cnfg_R + 0x10 * t0;
3082
3083                         if( load32_ci( t1 ) == 0 ) {
3084                                 store32_ci( t1, l_DmCnfg_u32 );
3085                                 l_UsrCnfg_u32 |=
3086                                 ( m_dgrptr[i]->m_csmode_u32 << ( 30 - 2 * t0 ) );
3087                         }
3088
3089                 }
3090
3091         }
3092
3093         /*
3094          * setup UsrCnfg register
3095          *- cs mode is selected above
3096          *- Interleave on L2 cache line
3097          *- Usually closed page policy
3098          */
3099         l_UsrCnfg_u32 |=  IBIT(8);      // interleave on L2 cache line
3100         l_UsrCnfg_u32 &= ~IBIT(9);      // usually closed
3101         l_UsrCnfg_u32 |=  IBIT(10);
3102         store32_ci( UsrCnfg_R, l_UsrCnfg_u32 );
3103
3104         /*
3105          * Memory Arbiter Weight Register
3106          */
3107         // CohWt  -> MemAWt[0:1]
3108         l_MemArbWt_u32  = ( (uint32_t) 1 << 30 );
3109         // NCohWt -> MemAWt[2:3]
3110         l_MemArbWt_u32 |= ( (uint32_t) 1 << 28 );
3111         // ScrbWt -> MemAWt[4:5]
3112         l_MemArbWt_u32 |= ( (uint32_t) 0 << 26 );
3113         store32_ci( MemArbWt_R, l_MemArbWt_u32 );
3114
3115         /*
3116          * misc fixed register setup
3117          */
3118         store32_ci( ODTCntl_R,        rst->ODTCntl );
3119         store32_ci( IOPadCntl_R,      rst->IOPadCntl );
3120         store32_ci( MemPhyModeCntl_R, rst->MemPhyModeCntl );
3121         store32_ci( OCDCalCntl_R,     rst->OCDCalCntl );
3122         store32_ci( OCDCalCmd_R,      rst->OCDCalCmd );
3123
3124         /*
3125          * CK Delay registers
3126          */
3127         store32_ci( CKDelayL_R, rst->CKDelayL );
3128         store32_ci( CKDelayU_R, rst->CKDelayU );
3129
3130         /*
3131          * read/write strobe delays
3132          */
3133         store32_ci( ByteWrClkDelC0B00_R, rst->ByteWrClkDel[ 0] );
3134         store32_ci( ByteWrClkDelC0B01_R, rst->ByteWrClkDel[ 1] );
3135         store32_ci( ByteWrClkDelC0B02_R, rst->ByteWrClkDel[ 2] );
3136         store32_ci( ByteWrClkDelC0B03_R, rst->ByteWrClkDel[ 3] );
3137         store32_ci( ByteWrClkDelC0B04_R, rst->ByteWrClkDel[ 4] );
3138         store32_ci( ByteWrClkDelC0B05_R, rst->ByteWrClkDel[ 5] );
3139         store32_ci( ByteWrClkDelC0B06_R, rst->ByteWrClkDel[ 6] );
3140         store32_ci( ByteWrClkDelC0B07_R, rst->ByteWrClkDel[ 7] );
3141         store32_ci( ByteWrClkDelC0B16_R, rst->ByteWrClkDel[16] );
3142         store32_ci( ByteWrClkDelC0B08_R, rst->ByteWrClkDel[ 8] );
3143         store32_ci( ByteWrClkDelC0B09_R, rst->ByteWrClkDel[ 9] );
3144         store32_ci( ByteWrClkDelC0B10_R, rst->ByteWrClkDel[10] );
3145         store32_ci( ByteWrClkDelC0B11_R, rst->ByteWrClkDel[11] );
3146         store32_ci( ByteWrClkDelC0B12_R, rst->ByteWrClkDel[12] );
3147         store32_ci( ByteWrClkDelC0B13_R, rst->ByteWrClkDel[13] );
3148         store32_ci( ByteWrClkDelC0B14_R, rst->ByteWrClkDel[14] );
3149         store32_ci( ByteWrClkDelC0B15_R, rst->ByteWrClkDel[15] );
3150         store32_ci( ByteWrClkDelC0B17_R, rst->ByteWrClkDel[17] );
3151         store32_ci( ReadStrobeDelC0B00_R, rst->ReadStrobeDel[ 0] );
3152         store32_ci( ReadStrobeDelC0B01_R, rst->ReadStrobeDel[ 1] );
3153         store32_ci( ReadStrobeDelC0B02_R, rst->ReadStrobeDel[ 2] );
3154         store32_ci( ReadStrobeDelC0B03_R, rst->ReadStrobeDel[ 3] );
3155         store32_ci( ReadStrobeDelC0B04_R, rst->ReadStrobeDel[ 4] );
3156         store32_ci( ReadStrobeDelC0B05_R, rst->ReadStrobeDel[ 5] );
3157         store32_ci( ReadStrobeDelC0B06_R, rst->ReadStrobeDel[ 6] );
3158         store32_ci( ReadStrobeDelC0B07_R, rst->ReadStrobeDel[ 7] );
3159         store32_ci( ReadStrobeDelC0B16_R, rst->ReadStrobeDel[16] );
3160         store32_ci( ReadStrobeDelC0B08_R, rst->ReadStrobeDel[ 8] );
3161         store32_ci( ReadStrobeDelC0B09_R, rst->ReadStrobeDel[ 9] );
3162         store32_ci( ReadStrobeDelC0B10_R, rst->ReadStrobeDel[10] );
3163         store32_ci( ReadStrobeDelC0B11_R, rst->ReadStrobeDel[11] );
3164         store32_ci( ReadStrobeDelC0B12_R, rst->ReadStrobeDel[12] );
3165         store32_ci( ReadStrobeDelC0B13_R, rst->ReadStrobeDel[13] );
3166         store32_ci( ReadStrobeDelC0B14_R, rst->ReadStrobeDel[14] );
3167         store32_ci( ReadStrobeDelC0B15_R, rst->ReadStrobeDel[15] );
3168         store32_ci( ReadStrobeDelC0B17_R, rst->ReadStrobeDel[17] );
3169
3170         /*
3171          * Mem Bus Configuration
3172          * initial setup used in auto calibration
3173          * final values will be written after
3174          * auto calibration has finished
3175          */
3176         l_MemBusCnfg_u32  = rst->MemBusCnfg;
3177
3178 /*      values calculation has been dropped, static values are used instead
3179         // WdbRqDly = 2 * (CL - 3) (registered DIMMs) -> MBC[16:19]
3180         l_MemBusCnfg_u32 += ( ( 2 * ( CL - 3 ) ) << 12 );
3181         // RdOEOnDly = 0 (typically)
3182         l_MemBusCnfg_u32 += ( ( 0 )              <<  8 );
3183         // RdOEOffDly = (2 * CL) - 4 -> MBC[24:27]
3184         // NOTE: formula is not working, changed to:
3185         // RdOEOffDly = (2 * CL) - 1
3186         l_MemBusCnfg_u32 += ( ( ( 2 * CL ) - 1 ) <<  4 );
3187 */
3188
3189         store32_ci( MemBusCnfg_R, l_MemBusCnfg_u32 );
3190         store32_ci( MemBusCnfg2_R, rst->MemBusCnfg & (uint32_t) 0xf0000000 );
3191
3192         /*
3193          * reset verniers registers
3194          */
3195         store32_ci( RstLdEnVerniersC0_R, 0x0 );
3196         store32_ci( RstLdEnVerniersC1_R, 0x0 );
3197         store32_ci( RstLdEnVerniersC2_R, 0x0 );
3198         store32_ci( RstLdEnVerniersC3_R, 0x0 );
3199         store32_ci( ExtMuxVernier0_R,    0x0 );
3200         store32_ci( ExtMuxVernier1_R,    0x0 );
3201
3202         /*
3203          * Queue Configuration
3204          */
3205         store32_ci( MemRdQCnfg_R, rst->MemRdQCnfg );
3206         store32_ci( MemWrQCnfg_R, rst->MemWrQCnfg );
3207         store32_ci( MemQArb_R,    rst->MemQArb );
3208         store32_ci( MemRWArb_R,   rst->MemRWArb );
3209
3210         #ifdef U4_INFO
3211         printf( "\b\b\bOK\r\n" );
3212         #endif
3213
3214         /*
3215          * start up clocks & wait for pll2 to stabilize
3216          */
3217         #ifdef U4_INFO
3218         printf( "  [start DDR clock :          ]" );
3219         #endif
3220
3221         store32_ci( MemModeCntl_R, IBIT(0) | IBIT(8) );
3222         dly( 50000000 );
3223
3224         #ifdef U4_INFO
3225         printf( "\b\b\bOK\r\n" );
3226
3227         #endif
3228
3229         /*
3230          * memory initialization sequence
3231          */
3232         #ifdef U4_INFO
3233         printf( "  [memory init     :          ]" );
3234         #endif
3235         u4_MemInitSequence( tRP, tWR, tRFC, CL, tCK, TD );
3236         #ifdef U4_INFO
3237         printf( "\b\b\bOK\r\n" );
3238         #endif
3239
3240         /*
3241          * start ECC before auto calibration to enable ECC bytelane
3242          */
3243         store32_ci( MCCR_R, IBIT(0) );
3244         dly( 15000000 );
3245
3246         /*
3247          * start up auto calibration
3248          */
3249         #ifdef U4_INFO
3250         printf( "  [auto calibration:          ]\b" );
3251         #endif
3252
3253         /*
3254          * start auto calibration
3255         */
3256         l_acerr_i32 = 0;
3257         do {
3258                 progbar();
3259
3260                 l_ac_i32 = u4_auto_calib( &l_ac_st );
3261
3262                 if( l_ac_i32 != 0 ) {
3263                         l_acerr_i32++;
3264                 }
3265
3266                 dly( 15000000 );
3267         } while( ( l_ac_i32    != 0             ) &&
3268                  ( l_acerr_i32 <= MAX_ACERR     ) );
3269
3270         if( l_acerr_i32 > MAX_ACERR ) {
3271                 #ifdef U4_INFO
3272                 printf( "\b\b\bERR\r\n" );
3273                 #endif
3274                 return RET_ERR;
3275         }
3276
3277         /*
3278          * insert auto calibration results
3279          */
3280         store32_ci( MemBusCnfg_R,        l_ac_st.m_MemBusCnfg_u32 );
3281         store32_ci( MemBusCnfg2_R,       l_ac_st.m_MemBusCnfg2_u32 );
3282         store32_ci( RstLdEnVerniersC0_R, l_ac_st.m_RstLdEnVerniers_pu32[0] );
3283         store32_ci( RstLdEnVerniersC1_R, l_ac_st.m_RstLdEnVerniers_pu32[1] );
3284         store32_ci( RstLdEnVerniersC2_R, l_ac_st.m_RstLdEnVerniers_pu32[2] );
3285         store32_ci( RstLdEnVerniersC3_R, l_ac_st.m_RstLdEnVerniers_pu32[3] );
3286
3287         /*
3288          * insert final timing value into MemRWArb
3289          */
3290         l_MemRWArb_u32  = ( ( l_ac_st.m_MemBusCnfg_u32 >> 28 /*RdMacDel*/) + 1 );
3291         l_MemRWArb_u32 *= 10;   // needed for rounding
3292         l_MemRWArb_u32 /= 2;    // due to spec
3293         l_MemRWArb_u32 += 9;    // round up
3294         l_MemRWArb_u32 /= 10;   // value is rounded now
3295         l_MemRWArb_u32  = l_MemRWArb_u32 + 6 - WL - TD;
3296         l_MemRWArb_u32 |= rst->MemRWArb;
3297         store32_ci( MemRWArb_R, l_MemRWArb_u32 );
3298
3299         progbar();
3300         dly( 15000000 );
3301
3302         /*
3303          * do initial scrubbing
3304          */
3305         *f_ecc_pt = u4_InitialScrub();
3306
3307         switch( f_ecc_pt->m_err_i32 ) {
3308                 case  RET_OK: {
3309                         #ifdef U4_INFO
3310                         printf( "\b\bOK\r\n" );
3311                         #endif
3312                         break;
3313                 }
3314
3315                 case RET_ACERR_CE: {
3316                         #ifdef U4_INFO
3317                         printf( "\b\b\b\bWEAK][correctable errors during scrub (%u)]\r\n",
3318                                 f_ecc_pt->m_cecnt_u32 );
3319                         #endif
3320                         break;
3321                 }
3322
3323                 case RET_ACERR_UEWT:
3324                 case RET_ACERR_UE: {
3325                         #ifdef U4_INFO
3326                         printf( "\b\b\bERR][uncorrectable errors during scrub (%u)]\r\n",
3327                                 f_ecc_pt->m_uecnt_u32 );
3328                         #endif
3329                         return RET_ACERR_UE;
3330                 }
3331
3332         }
3333
3334         /*
3335          * start continuous background scrub
3336          */
3337         #ifdef U4_INFO
3338         printf( "  [background scrub:          ]" );
3339         #endif
3340
3341         u4_Scrub( BACKGROUND_SCRUB, 0, NULL );
3342
3343         #ifdef U4_INFO
3344         printf( "\b\b\bOK\r\n" );
3345         #endif
3346
3347         /*
3348          * finally clear API Exception register
3349          * (read to clear)
3350          */
3351         load32_ci( APIExcp_R );
3352
3353         return RET_OK;
3354 }
3355
3356 #undef RND
3357
3358 #if 0
3359 void
3360 u4_memtest(uint8_t argCnt, char *pArgs[], uint64_t flags)
3361 {
3362         #define TEND                    99
3363         #define TCHK                    100
3364         static const uint64_t _2GB   = (uint64_t) 0x80000000;
3365         static const uint64_t _start = (uint64_t) 0x08000000;   // 128Mb
3366         static const uint64_t _bsize = (uint64_t) 0x08000000;   // 128MB
3367         static const uint64_t _line  = (uint64_t) 128;
3368         static const uint64_t _256MB = (uint64_t) 0x10000000;
3369
3370         static const uint64_t PATTERN[] = {
3371                 0x9090909090909090, 0x0020002000200020,
3372                 0x0c0c0c0c0c0c0c0c, 0x8080808080808080,
3373                 0x1004010004001041, 0x0000000000000000
3374         };
3375
3376         uint64_t mend      = (uint64_t) 0x200000000;//m_memsize_u64;
3377         uint64_t numblocks = ( mend - _start ) / _bsize;        // 128Mb blocks
3378         uint64_t numlines  = _bsize / _line;
3379         uint64_t tstate    = 0;
3380         uint64_t tlast     = 0;
3381         uint64_t pidx      = 0;
3382         uint64_t rotr      = 0;
3383         uint64_t rotl      = 0;
3384         uint64_t block;
3385         uint64_t line;
3386         uint64_t addr;
3387         uint64_t i;
3388         uint64_t check = 0;
3389         uint64_t dcnt;
3390         uint64_t uerr = 0;
3391         uint64_t cerr = 0;
3392         uint64_t merr = 0;
3393         char     c;
3394
3395         printf( "\n\nU4 memory test" );
3396         printf( "\n--------------" );
3397
3398         /*
3399          * mask out UEC & CEC
3400          */
3401         or32_ci( MCCR_R, IBIT(6) | IBIT(7) );
3402
3403         while( PATTERN[pidx] ) {
3404
3405                 switch( tstate )
3406                 {
3407                 case 0: {
3408                         printf( "\npattern fill 0x%08X%08X: ", (uint32_t) (PATTERN[pidx] >> 32), (uint32_t) PATTERN[pidx] );
3409
3410                         /*
3411                          * first switch lines, then blocks. This way the CPU
3412                          * is not able to cache data
3413                          */
3414                         for( line = 0, dcnt = 0; line < numlines; line++ ) {
3415
3416                                 for( block = 0; block < numblocks; block++ ) {
3417
3418                                         for( i = 0; i < _line; i += 8 ) {
3419                                                 addr =  _start +
3420                                                         ( block * _bsize ) +
3421                                                         ( line * _line )   +
3422                                                         i;
3423
3424                                                 if( addr >= _2GB ) {
3425                                                         addr += _2GB;
3426                                                 }
3427
3428                                                 *( (uint64_t *) addr ) = PATTERN[pidx];
3429
3430                                                 /*
3431                                                  * print out a dot every 256Mb
3432                                                  */
3433                                                 dcnt += 8;
3434                                                 if( dcnt == _256MB ) {
3435                                                         dcnt = 0;
3436                                                         printf( "*" );
3437
3438                                                         if( io_getchar( &c ) ) {
3439                                                                 goto mtend;
3440                                                         }
3441
3442                                                 }
3443
3444                                         }
3445
3446                                 }
3447
3448                         }
3449
3450                         check  = PATTERN[pidx];
3451                         tlast  = 0;
3452                         tstate = TCHK;
3453                 }       break;
3454
3455                 case 1: {
3456                         uint64_t one;
3457
3458                         /*
3459                          * new check pattern
3460
3461                          */
3462                         one     = ( ( check & 0x1 ) != 0 );
3463                         check >>= 1;
3464                         if( one ) {
3465                                 check |= 0x8000000000000000;
3466                         }
3467
3468                         printf( "\nrotate right 0x%08X%08X: ", (uint32_t) (check >> 32), (uint32_t) check );
3469
3470                         /*
3471                          * first switch lines, then blocks. This way the CPU
3472                          * is not able to cache data
3473                          */
3474                         for( line = 0, dcnt = 0; line < numlines; line++ ) {
3475
3476                                 for( block = 0; block < numblocks; block++ ) {
3477
3478                                         for( i = 0; i < _line; i += 8 ) {
3479                                                 addr =  _start +
3480                                                         ( block * _bsize ) +
3481                                                         ( line * _line )   +
3482                                                         i;
3483
3484                                                 if( addr >= _2GB ) {
3485                                                         addr += _2GB;
3486                                                 }
3487
3488                                                 *( (uint64_t *) addr ) >>= 1;
3489
3490                                                 if( one ) {
3491                                                         *( (uint64_t *) addr ) |=
3492                                                                 (uint64_t) 0x8000000000000000;
3493                                                 }
3494
3495                                                 /*
3496                                                  * print out a dot every 256Mb
3497                                                  */
3498                                                 dcnt += 8;
3499                                                 if( dcnt == _256MB ) {
3500                                                         dcnt = 0;
3501                                                         printf( "*" );
3502
3503                                                         if( io_getchar( &c ) ) {
3504                                                                 goto mtend;
3505                                                         }
3506
3507                                                 }
3508
3509                                         }
3510
3511                                 }
3512
3513                         }
3514
3515                         tlast  = 1;
3516                         tstate = TCHK;
3517                 }       break;
3518
3519                 case 2: {
3520
3521                         if( rotr < 6 ) {
3522                                 rotr++;
3523                                 tstate = 1;
3524                         } else {
3525                                 rotr   = 0;
3526                                 tstate = 3;
3527                         }
3528
3529                 }       break;
3530
3531                 case 3: {
3532                         /*
3533                          * new check pattern
3534                          */
3535                         check ^= (uint64_t) ~0;
3536
3537                         printf( "\ninverting    0x%08X%08X: ", (uint32_t) (check >> 32), (uint32_t) check );
3538
3539                         /*
3540                          * first switch lines, then blocks. This way the CPU
3541                          * is not able to cache data
3542                          */
3543                         for( line = 0, dcnt = 0; line < numlines; line++ ) {
3544
3545                                 for( block = 0; block < numblocks; block++ ) {
3546
3547                                         for( i = 0; i < _line; i += 8 ) {
3548                                                 addr =  _start +
3549                                                         ( block * _bsize ) +
3550                                                         ( line * _line )   +
3551                                                         i;
3552
3553                                                 if( addr >= _2GB ) {
3554                                                         addr += _2GB;
3555                                                 }
3556
3557                                                 *( (uint64_t *) addr ) ^= (uint64_t) ~0;
3558
3559                                                 /*
3560                                                  * print out a dot every 256Mb
3561                                                  */
3562                                                 dcnt += 8;
3563                                                 if( dcnt == _256MB ) {
3564                                                         dcnt = 0;
3565                                                         printf( "*" );
3566
3567                                                         if( io_getchar( &c ) ) {
3568                                                                 goto mtend;
3569                                                         }
3570
3571                                                 }
3572
3573                                         }
3574
3575                                 }
3576
3577                         }
3578
3579                         tlast  = 3;
3580                         tstate = TCHK;
3581                 }       break;
3582
3583                 case 4: {
3584                         uint64_t one;
3585
3586                         /*
3587                          * new check pattern
3588                          */
3589                         one     = ( ( check & 0x8000000000000000 ) != 0 );
3590                         check <<= 1;
3591                         if( one ) {
3592                                 check |= 0x1;
3593                         }
3594
3595                         printf( "\nrotate left  0x%08X%08X: ", (uint32_t) (check >> 32), (uint32_t) check );
3596
3597                         /*
3598                          * first switch lines, then blocks. This way the CPU
3599                          * is not able to cache data
3600                          */
3601                         for( line = 0, dcnt = 0; line < numlines; line++ ) {
3602
3603                                 for( block = 0; block < numblocks; block++ ) {
3604
3605                                         for( i = 0; i < _line; i += 8 ) {
3606                                                 addr =  _start +
3607                                                         ( block * _bsize ) +
3608                                                         ( line * _line )   +
3609                                                         i;
3610
3611                                                 if( addr >= _2GB ) {
3612                                                         addr += _2GB;
3613                                                 }
3614
3615                                                 *( (uint64_t *) addr ) <<= 1;
3616
3617                                                 if( one ) {
3618                                                         *( (uint64_t *) addr ) |=
3619                                                                 (uint64_t) 0x1;
3620                                                 }
3621
3622                                                 /*
3623                                                  * print out a dot every 256Mb
3624                                                  */
3625                                                 dcnt += 8;
3626                                                 if( dcnt == _256MB ) {
3627                                                         dcnt = 0;
3628                                                         printf( "*" );
3629
3630                                                         if( io_getchar( &c ) ) {
3631                                                                 goto mtend;
3632                                                         }
3633
3634                                                 }
3635
3636                                         }
3637
3638                                 }
3639
3640                         }
3641
3642                         tlast  = 4;
3643                         tstate = TCHK;
3644                 }       break;
3645
3646                 case 5: {
3647
3648                         if( rotl < 6 ) {
3649                                 rotl++;
3650                                 tstate = 4;
3651                         } else {
3652                                 rotl   = 0;
3653                                 tstate = 6;
3654                         }
3655
3656                 }       break;
3657
3658                 case 6: {
3659                         /*
3660                          * new check pattern
3661                          */
3662                         check *= ~check;
3663                         printf( "\nmultiply     0x%08X%08X: ", (uint32_t) (check >> 32), (uint32_t) check );
3664
3665                         /*
3666                          * first switch lines, then blocks. This way the CPU
3667                          * is not able to cache data
3668                          */
3669                         for( line = 0, dcnt = 0; line < numlines; line++ ) {
3670
3671                                 for( block = 0; block < numblocks; block++ ) {
3672
3673                                         for( i = 0; i < _line; i += 8 ) {
3674                                                 addr =  _start +
3675                                                         ( block * _bsize ) +
3676                                                         ( line * _line )   +
3677                                                         i;
3678
3679                                                 if( addr >= _2GB ) {
3680                                                         addr += _2GB;
3681                                                 }
3682
3683                                                 *( (uint64_t *) addr ) *= ~( *( (uint64_t *) addr ) );
3684
3685                                                 /*
3686                                                  * print out a dot every 256Mb
3687                                                  */
3688                                                 dcnt += 8;
3689                                                 if( dcnt == _256MB ) {
3690                                                         dcnt = 0;
3691                                                         printf( "*" );
3692
3693                                                         if( io_getchar( &c ) ) {
3694                                                                 goto mtend;
3695                                                         }
3696
3697                                                 }
3698
3699                                         }
3700
3701                                 }
3702
3703                         }
3704
3705                         tlast  = TEND - 1;
3706                         tstate = TCHK;
3707                 }       break;
3708
3709                 case TEND: {
3710                         pidx++;
3711                         tstate = 0;
3712                 }       break;
3713
3714                 case TCHK: {
3715                         uint64_t err;
3716                         /*
3717                          * check data
3718                          */
3719                         printf( "\nchecking                       : " );
3720
3721                         for( line = 0, dcnt = 0; line < numlines; line++ ) {
3722
3723                                 for( block = 0; block < numblocks; block++ ) {
3724
3725                                         for( i = 0; i < _line; i += 8 ) {
3726                                                 addr =  _start +
3727                                                         ( block * _bsize ) +
3728                                                         ( line * _line )   +
3729                                                         i;
3730
3731                                                 if( addr >= _2GB ) {
3732                                                         addr += _2GB;
3733                                                 }
3734
3735                                                 err = ( *( (uint64_t *) addr ) != check );
3736
3737                                                 if( err ) {
3738                                                         merr++;
3739                                                 }
3740
3741                                                 /*
3742                                                  * print out a dot every 256Mb
3743                                                  */
3744                                                 dcnt += 8;
3745                                                 if( dcnt == _256MB ) {
3746                                                         dcnt = 0;
3747
3748                                                         if( err ) {
3749                                                                 printf( "X" );
3750                                                         } else {
3751                                                                 printf( "*" );
3752                                                         }
3753
3754                                                         if( io_getchar( &c ) ) {
3755                                                                 goto mtend;
3756                                                         }
3757
3758                                                 }
3759
3760                                         }
3761
3762                                 }
3763
3764                         }
3765
3766                         err   = (uint64_t) load32_ci( MEAR1_R );
3767                         uerr += ( err >> 24 ) & (uint64_t) 0xff;
3768                         cerr += ( err >> 16 ) & (uint64_t) 0xff;
3769
3770                         printf( " (UE: %02llX, CE: %02llX)", ( err >> 24 ) & (uint64_t) 0xff, ( err >> 16 ) & (uint64_t) 0xff );
3771
3772                         tstate = tlast + 1;
3773                         tlast  = TCHK;
3774                 }       break;
3775
3776                 }
3777
3778         }
3779
3780 mtend:
3781         printf( "\n\nmemory test results" );
3782         printf( "\n-------------------" );
3783         printf( "\nuncorrectable errors: %u", (uint32_t) uerr );
3784         printf( "\ncorrectable errors  : %u", (uint32_t) cerr );
3785         printf( "\nread/write errors   : %u\n", (uint32_t) merr );
3786
3787         and32_ci( MCCR_R, ~( IBIT(6) | IBIT(7) ) );
3788 }
3789 #endif
3790
3791 #if 0
3792 void
3793 u4_dump(uint8_t argCnt, char *pArgs[], uint64_t flags)
3794 {
3795         printf( "\r\n*** u4 register dump ***\r\n\n" );
3796         printf( "register      (offset): value\r\n" );
3797         printf( "----------------------------------\r\n" );
3798         printf( "Clock Control (0x%04X): 0x%08X\r\n", (uint16_t) ClkCntl_R, load32_ci( ClkCntl_R ) );
3799         printf( "PLL2 Control  (0x%04X): 0x%08X\r\n", (uint16_t) PLL2Cntl_R, load32_ci( PLL2Cntl_R ) );
3800         printf( "MemModeCntl   (0x%04X): 0x%08X\r\n", (uint16_t) MemModeCntl_R, load32_ci( MemModeCntl_R ) );
3801         printf( "RASTimer0     (0x%04X): 0x%08X\r\n", (uint16_t) RASTimer0_R, load32_ci( RASTimer0_R ) );
3802         printf( "RASTimer1     (0x%04X): 0x%08X\r\n", (uint16_t) RASTimer1_R, load32_ci( RASTimer1_R ) );
3803         printf( "CASTimer0     (0x%04X): 0x%08X\r\n", (uint16_t) CASTimer0_R, load32_ci( CASTimer0_R ) );
3804         printf( "CASTimer1     (0x%04X): 0x%08X\r\n", (uint16_t) CASTimer1_R, load32_ci( CASTimer1_R ) );
3805         printf( "MemRfshCntl   (0x%04X): 0x%08X\r\n", (uint16_t) MemRfshCntl_R, load32_ci( MemRfshCntl_R ) );
3806         printf( "Dm0Cnfg       (0x%04X): 0x%08X\r\n", (uint16_t) Dm0Cnfg_R, load32_ci( Dm0Cnfg_R ) );
3807         printf( "Dm1Cnfg       (0x%04X): 0x%08X\r\n", (uint16_t) Dm1Cnfg_R, load32_ci( Dm1Cnfg_R ) );
3808         printf( "Dm2Cnfg       (0x%04X): 0x%08X\r\n", (uint16_t) Dm2Cnfg_R, load32_ci( Dm2Cnfg_R ) );
3809         printf( "Dm3Cnfg       (0x%04X): 0x%08X\r\n", (uint16_t) Dm3Cnfg_R, load32_ci( Dm3Cnfg_R ) );
3810         printf( "UsrCnfg       (0x%04X): 0x%08X\r\n", (uint16_t) UsrCnfg_R, load32_ci( UsrCnfg_R ) );
3811         printf( "MemArbWt      (0x%04X): 0x%08X\r\n", (uint16_t) MemArbWt_R, load32_ci( MemArbWt_R ) );
3812         printf( "ODTCntl       (0x%04X): 0x%08X\r\n", (uint16_t) ODTCntl_R, load32_ci( ODTCntl_R ) );
3813         printf( "IOPadCntl     (0x%04X): 0x%08X\r\n", (uint16_t) IOPadCntl_R, load32_ci( IOPadCntl_R ) );
3814         printf( "MemPhyMode    (0x%04X): 0x%08X\r\n", (uint16_t) MemPhyModeCntl_R, load32_ci( MemPhyModeCntl_R ) );
3815         printf( "OCDCalCntl    (0x%04X): 0x%08X\r\n", (uint16_t) OCDCalCntl_R, load32_ci( OCDCalCntl_R ) );
3816         printf( "OCDCalCmd     (0x%04X): 0x%08X\r\n", (uint16_t) OCDCalCmd_R, load32_ci( OCDCalCmd_R ) );
3817         printf( "CKDelayL      (0x%04X): 0x%08X\r\n", (uint16_t) CKDelayL_R, load32_ci( CKDelayL_R ) );
3818         printf( "CKDelayH      (0x%04X): 0x%08X\r\n", (uint16_t) CKDelayU_R, load32_ci( CKDelayU_R ) );
3819         printf( "MemBusCnfg    (0x%04X): 0x%08X\r\n", (uint16_t) MemBusCnfg_R, load32_ci( MemBusCnfg_R ) );
3820         printf( "MemBusCnfg2   (0x%04X): 0x%08X\r\n", (uint16_t) MemBusCnfg2_R, load32_ci( MemBusCnfg2_R ) );
3821         printf( "MemRdQCnfg    (0x%04X): 0x%08X\r\n", (uint16_t) MemRdQCnfg_R, load32_ci( MemRdQCnfg_R ) );
3822         printf( "MemWrQCnfg    (0x%04X): 0x%08X\r\n", (uint16_t) MemWrQCnfg_R, load32_ci( MemWrQCnfg_R ) );
3823         printf( "MemQArb       (0x%04X): 0x%08X\r\n", (uint16_t) MemQArb_R, load32_ci( MemQArb_R ) );
3824         printf( "MemRWArb      (0x%04X): 0x%08X\r\n", (uint16_t) MemRWArb_R, load32_ci( MemRWArb_R ) );
3825         printf( "ByteWrClkDel  (0x%04X): 0x%08X\r\n", (uint16_t) ByteWrClkDelC0B00_R, load32_ci( ByteWrClkDelC0B00_R ) );
3826         printf( "ReadStrobeDel (0x%04X): 0x%08X\r\n", (uint16_t) ReadStrobeDelC0B00_R, load32_ci( ReadStrobeDelC0B00_R ) );
3827         printf( "RstLdEnVerC0  (0x%04X): 0x%08X\r\n", (uint16_t) RstLdEnVerniersC0_R, load32_ci( RstLdEnVerniersC0_R ) );
3828         printf( "RstLdEnVerC1  (0x%04X): 0x%08X\r\n", (uint16_t) RstLdEnVerniersC1_R, load32_ci( RstLdEnVerniersC1_R ) );
3829         printf( "RstLdEnVerC2  (0x%04X): 0x%08X\r\n", (uint16_t) RstLdEnVerniersC2_R, load32_ci( RstLdEnVerniersC2_R ) );
3830         printf( "RstLdEnVerC3  (0x%04X): 0x%08X\r\n", (uint16_t) RstLdEnVerniersC3_R, load32_ci( RstLdEnVerniersC3_R ) );
3831         printf( "APIMemRdCfg   (0x%04X): 0x%08X\r\n", (uint16_t) APIMemRdCfg_R, load32_ci( APIMemRdCfg_R ) );
3832         printf( "scrub start   (0x%04X): 0x%08X\r\n", (uint16_t) MSRSR_R, load32_ci( MSRSR_R ) );
3833         printf( "scrub end     (0x%04X): 0x%08X\r\n", (uint16_t) MSRER_R, load32_ci( MSRER_R ) );
3834 }
3835 #endif
3836
3837 static int32_t
3838 u4_memBegin( eccerror_t *f_ecc_pt )
3839 {
3840         int32_t i;
3841
3842         #ifdef U4_INFO
3843         printf( "\r\n" );
3844         printf( "U4 DDR2 memory controller setup V%u.%u\r\n",
3845                 VER, SUBVER );
3846         printf( "------------------------------------\r\n" );
3847         printf( "> detected board              : " );
3848
3849         if( IS_MAUI ) {
3850                 printf( "MAUI" );
3851         } else if( IS_BIMINI ) {
3852                 printf( "BIMINI" );
3853         } else if( IS_KAUAI ) {
3854                 printf( "KAUAI" );
3855         } else {
3856                 printf( "unknown!" );
3857                 return RET_ERR;
3858         }
3859         #endif
3860
3861         do {
3862                 /*
3863                  * initialize variables
3864                  */
3865                 m_memsize_u64    = 0;
3866                 m_dcnt_u32       = 0;
3867                 m_dgrcnt_u32     = 0;
3868                 m_dclidx_u32     = 0;
3869
3870                 for( i = 0; i < NUM_SLOTS; i++ ) {
3871                         m_dptr[i] = NULL;
3872                         memset( ( void * ) &m_dimm[i], 0, sizeof( dimm_t ) );
3873                 }
3874
3875                 for( i = 0; i < MAX_DGROUPS; i++ ) {
3876                         m_dgrptr[i] = NULL;
3877                         memset( ( void * ) &m_dgroup[i], 0, sizeof( dimm_t ) );
3878                 }
3879
3880                 /*
3881                  * start configuration
3882                  */
3883                 #ifdef U4_INFO
3884                 printf( "\r\n> detected DIMM configuration : " );
3885                 #endif
3886
3887                 i = ddr2_readSPDs();
3888
3889                 if( i != RET_OK ) {
3890                         #ifdef U4_INFO
3891                         printf( "\r\n-------------------------------------------------------------" );
3892                         printf( "\r\n  switching off memory bank(s) due to SPD integrity failure" );
3893                         printf( "\r\n-------------------------------------------------------------\r\n" );
3894                         #endif
3895                 }
3896
3897         } while( i != RET_OK );
3898
3899         /*
3900          * check DIMM configuration
3901          */
3902         if( ddr2_setupDIMMcfg() != RET_OK ) {
3903                 #ifdef U4_INFO
3904                 printf( "> initialization failure.\r\n" );
3905                 #endif
3906                 return RET_ERR;
3907         }
3908
3909         /*
3910          * create DIMM groups
3911          */
3912         u4_setupDIMMgroups();
3913
3914         /*
3915          * start configuration of u4
3916          */
3917         u4_calcDIMMcnfg();
3918
3919         if( u4_calcDIMMmemmode() != RET_OK ) {
3920                 #ifdef U4_INFO
3921                 printf( "> initialization failure.\r\n" );
3922                 #endif
3923                 return RET_ERR;
3924         }
3925
3926         #ifdef U4_INFO
3927         printf( "%uMb @ %uMhz, CL %u\r\n",
3928                 (uint32_t) ( m_memsize_u64 / 0x100000 ),
3929                 m_gendimm.m_speed_pu32[m_dclidx_u32],
3930                 m_gendimm.m_clval_pu32[m_dclidx_u32] );
3931
3932         printf( "> initializing memory         :\r\n" );
3933         #endif
3934
3935         if( u4_setup_core_clock() != RET_OK ) {
3936                 #ifdef U4_INFO
3937                 printf( "> initialization failure.\r\n" );
3938                 #endif
3939                 return RET_ERR;
3940         }
3941
3942         i = u4_start( f_ecc_pt );
3943         if( i != RET_OK ) {
3944                 #ifdef U4_INFO
3945                 printf( "> initialization failure.\r\n" );
3946                 #endif
3947                 return i;
3948         }
3949
3950         #ifdef U4_INFO
3951         printf( "  [flush cache     :          ]" );
3952         #endif
3953
3954         flush_cache( 0x0, L2_CACHE_SIZE );
3955
3956         #ifdef U4_INFO
3957         printf( "\b\b\bOK\r\n" );
3958         printf( "> initialization complete.\r\n" );
3959         #endif
3960
3961 #ifdef U4_SHOW_REGS
3962         u4_dump(0,0,0);
3963 #endif
3964
3965         return RET_OK;
3966 }
3967
3968
3969 #if 0
3970 static int32_t scrubstarted = 0;
3971 void
3972 u4_scrubStart(uint8_t argCnt, char *pArgs[], uint64_t flags )
3973 {
3974         scrubstarted = 1;
3975
3976         /*
3977          * setup scrub parameters
3978          */
3979         store32_ci( MSCR_R, 0 );                        // stop scrub
3980         store32_ci( MSRSR_R, 0x0 );                     // set start
3981         store32_ci( MSRER_R, 0x1c );                    // set end
3982         store32_ci( MSPR_R, 0x0 );                      // set pattern
3983
3984         /*
3985          * clear out ECC error registers
3986          */
3987         store32_ci( MEAR0_R, 0x0 );
3988         store32_ci( MEAR1_R, 0x0 );
3989         store32_ci( MESR_R, 0x0 );
3990
3991         /*
3992          * Setup Scrub Type
3993          */
3994         store32_ci( MSCR_R, IBIT(1) );
3995         printf( "\r\nscrub started\r\n" );
3996 }
3997 #endif
3998
3999 #if 0
4000 void
4001 u4_scrubEnd(uint8_t argCnt, char *pArgs[], uint64_t flags )
4002 {
4003         store32_ci( MSCR_R, 0 );                        // stop scrub
4004         scrubstarted = 0;
4005         printf( "\r\nscrub stopped\r\n" );
4006 }
4007 #endif
4008
4009 #if 0
4010 void
4011 u4_memwr(uint8_t argCnt, char *pArgs[], uint64_t flags )
4012 {
4013         uint32_t i;
4014         uint32_t v = 0;
4015
4016         for( i = 0; i < 0x200; i += 4 ) {
4017
4018                 if( ( i & 0xf ) == 0 ) {
4019                         v = ~v;
4020                 }
4021
4022                 store32_ci( i, v );
4023         }
4024
4025 }
4026 #endif
4027
4028 void
4029 u4memInit()
4030 {
4031         static uint32_t l_isInit_u32 = 0;
4032         eccerror_t      l_ecc_t;
4033         int32_t         ret;
4034
4035         /*
4036          * do not initialize memory more than once
4037          */
4038         if( l_isInit_u32 ) {
4039                 #ifdef U4_INFO
4040                 printf( "\r\n\nmemory already initialized\r\n" );
4041                 #endif
4042                 return;
4043         } else {
4044                 l_isInit_u32 = 1;
4045         }
4046
4047         /*
4048          * enable all DIMM banks on first run
4049          */
4050         m_bankoff_u32 = 0;
4051
4052         do {
4053                 ret = u4_memBegin( &l_ecc_t );
4054
4055                 if( ret < RET_ERR ) {
4056                         uint32_t l_bank_u32 = l_ecc_t.m_rank_u32 / 2;
4057                         printf( "\r\n-----------------------------------------------------" );
4058                         printf( "\r\n  switching off memory bank %u due to memory failure", l_bank_u32 );
4059                         printf( "\r\n-----------------------------------------------------" );
4060                         m_bankoff_u32 |= ( 1 << l_bank_u32 );
4061                 }
4062
4063         } while( ret < RET_ERR );
4064
4065 }