Add qemu 2.4.0
[kvmfornfv.git] / qemu / target-ppc / int_helper.c
1 /*
2  *  PowerPC integer and vector emulation helpers for QEMU.
3  *
4  *  Copyright (c) 2003-2007 Jocelyn Mayer
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "exec/helper-proto.h"
22 #include "crypto/aes.h"
23
24 #include "helper_regs.h"
25 /*****************************************************************************/
26 /* Fixed point operations helpers */
27
28 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
29                            uint32_t oe)
30 {
31     uint64_t rt = 0;
32     int overflow = 0;
33
34     uint64_t dividend = (uint64_t)ra << 32;
35     uint64_t divisor = (uint32_t)rb;
36
37     if (unlikely(divisor == 0)) {
38         overflow = 1;
39     } else {
40         rt = dividend / divisor;
41         overflow = rt > UINT32_MAX;
42     }
43
44     if (unlikely(overflow)) {
45         rt = 0; /* Undefined */
46     }
47
48     if (oe) {
49         if (unlikely(overflow)) {
50             env->so = env->ov = 1;
51         } else {
52             env->ov = 0;
53         }
54     }
55
56     return (target_ulong)rt;
57 }
58
59 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
60                           uint32_t oe)
61 {
62     int64_t rt = 0;
63     int overflow = 0;
64
65     int64_t dividend = (int64_t)ra << 32;
66     int64_t divisor = (int64_t)((int32_t)rb);
67
68     if (unlikely((divisor == 0) ||
69                  ((divisor == -1ull) && (dividend == INT64_MIN)))) {
70         overflow = 1;
71     } else {
72         rt = dividend / divisor;
73         overflow = rt != (int32_t)rt;
74     }
75
76     if (unlikely(overflow)) {
77         rt = 0; /* Undefined */
78     }
79
80     if (oe) {
81         if (unlikely(overflow)) {
82             env->so = env->ov = 1;
83         } else {
84             env->ov = 0;
85         }
86     }
87
88     return (target_ulong)rt;
89 }
90
91 #if defined(TARGET_PPC64)
92
93 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
94 {
95     uint64_t rt = 0;
96     int overflow = 0;
97
98     overflow = divu128(&rt, &ra, rb);
99
100     if (unlikely(overflow)) {
101         rt = 0; /* Undefined */
102     }
103
104     if (oe) {
105         if (unlikely(overflow)) {
106             env->so = env->ov = 1;
107         } else {
108             env->ov = 0;
109         }
110     }
111
112     return rt;
113 }
114
115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
116 {
117     int64_t rt = 0;
118     int64_t ra = (int64_t)rau;
119     int64_t rb = (int64_t)rbu;
120     int overflow = divs128(&rt, &ra, rb);
121
122     if (unlikely(overflow)) {
123         rt = 0; /* Undefined */
124     }
125
126     if (oe) {
127
128         if (unlikely(overflow)) {
129             env->so = env->ov = 1;
130         } else {
131             env->ov = 0;
132         }
133     }
134
135     return rt;
136 }
137
138 #endif
139
140
141 target_ulong helper_cntlzw(target_ulong t)
142 {
143     return clz32(t);
144 }
145
146 #if defined(TARGET_PPC64)
147 target_ulong helper_cntlzd(target_ulong t)
148 {
149     return clz64(t);
150 }
151 #endif
152
153 #if defined(TARGET_PPC64)
154
155 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
156 {
157     int i;
158     uint64_t ra = 0;
159
160     for (i = 0; i < 8; i++) {
161         int index = (rs >> (i*8)) & 0xFF;
162         if (index < 64) {
163             if (rb & (1ull << (63-index))) {
164                 ra |= 1 << i;
165             }
166         }
167     }
168     return ra;
169 }
170
171 #endif
172
173 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
174 {
175     target_ulong mask = 0xff;
176     target_ulong ra = 0;
177     int i;
178
179     for (i = 0; i < sizeof(target_ulong); i++) {
180         if ((rs & mask) == (rb & mask)) {
181             ra |= mask;
182         }
183         mask <<= 8;
184     }
185     return ra;
186 }
187
188 /* shift right arithmetic helper */
189 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
190                          target_ulong shift)
191 {
192     int32_t ret;
193
194     if (likely(!(shift & 0x20))) {
195         if (likely((uint32_t)shift != 0)) {
196             shift &= 0x1f;
197             ret = (int32_t)value >> shift;
198             if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
199                 env->ca = 0;
200             } else {
201                 env->ca = 1;
202             }
203         } else {
204             ret = (int32_t)value;
205             env->ca = 0;
206         }
207     } else {
208         ret = (int32_t)value >> 31;
209         env->ca = (ret != 0);
210     }
211     return (target_long)ret;
212 }
213
214 #if defined(TARGET_PPC64)
215 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
216                          target_ulong shift)
217 {
218     int64_t ret;
219
220     if (likely(!(shift & 0x40))) {
221         if (likely((uint64_t)shift != 0)) {
222             shift &= 0x3f;
223             ret = (int64_t)value >> shift;
224             if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
225                 env->ca = 0;
226             } else {
227                 env->ca = 1;
228             }
229         } else {
230             ret = (int64_t)value;
231             env->ca = 0;
232         }
233     } else {
234         ret = (int64_t)value >> 63;
235         env->ca = (ret != 0);
236     }
237     return ret;
238 }
239 #endif
240
241 #if defined(TARGET_PPC64)
242 target_ulong helper_popcntb(target_ulong val)
243 {
244     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
245                                            0x5555555555555555ULL);
246     val = (val & 0x3333333333333333ULL) + ((val >>  2) &
247                                            0x3333333333333333ULL);
248     val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
249                                            0x0f0f0f0f0f0f0f0fULL);
250     return val;
251 }
252
253 target_ulong helper_popcntw(target_ulong val)
254 {
255     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
256                                            0x5555555555555555ULL);
257     val = (val & 0x3333333333333333ULL) + ((val >>  2) &
258                                            0x3333333333333333ULL);
259     val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
260                                            0x0f0f0f0f0f0f0f0fULL);
261     val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
262                                            0x00ff00ff00ff00ffULL);
263     val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
264                                            0x0000ffff0000ffffULL);
265     return val;
266 }
267
268 target_ulong helper_popcntd(target_ulong val)
269 {
270     return ctpop64(val);
271 }
272 #else
273 target_ulong helper_popcntb(target_ulong val)
274 {
275     val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
276     val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
277     val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
278     return val;
279 }
280
281 target_ulong helper_popcntw(target_ulong val)
282 {
283     val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
284     val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
285     val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
286     val = (val & 0x00ff00ff) + ((val >>  8) & 0x00ff00ff);
287     val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
288     return val;
289 }
290 #endif
291
292 /*****************************************************************************/
293 /* PowerPC 601 specific instructions (POWER bridge) */
294 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
295 {
296     uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
297
298     if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
299         (int32_t)arg2 == 0) {
300         env->spr[SPR_MQ] = 0;
301         return INT32_MIN;
302     } else {
303         env->spr[SPR_MQ] = tmp % arg2;
304         return  tmp / (int32_t)arg2;
305     }
306 }
307
308 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
309                          target_ulong arg2)
310 {
311     uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
312
313     if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
314         (int32_t)arg2 == 0) {
315         env->so = env->ov = 1;
316         env->spr[SPR_MQ] = 0;
317         return INT32_MIN;
318     } else {
319         env->spr[SPR_MQ] = tmp % arg2;
320         tmp /= (int32_t)arg2;
321         if ((int32_t)tmp != tmp) {
322             env->so = env->ov = 1;
323         } else {
324             env->ov = 0;
325         }
326         return tmp;
327     }
328 }
329
330 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
331                          target_ulong arg2)
332 {
333     if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
334         (int32_t)arg2 == 0) {
335         env->spr[SPR_MQ] = 0;
336         return INT32_MIN;
337     } else {
338         env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
339         return (int32_t)arg1 / (int32_t)arg2;
340     }
341 }
342
343 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
344                           target_ulong arg2)
345 {
346     if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
347         (int32_t)arg2 == 0) {
348         env->so = env->ov = 1;
349         env->spr[SPR_MQ] = 0;
350         return INT32_MIN;
351     } else {
352         env->ov = 0;
353         env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
354         return (int32_t)arg1 / (int32_t)arg2;
355     }
356 }
357
358 /*****************************************************************************/
359 /* 602 specific instructions */
360 /* mfrom is the most crazy instruction ever seen, imho ! */
361 /* Real implementation uses a ROM table. Do the same */
362 /* Extremely decomposed:
363  *                      -arg / 256
364  * return 256 * log10(10           + 1.0) + 0.5
365  */
366 #if !defined(CONFIG_USER_ONLY)
367 target_ulong helper_602_mfrom(target_ulong arg)
368 {
369     if (likely(arg < 602)) {
370 #include "mfrom_table.c"
371         return mfrom_ROM_table[arg];
372     } else {
373         return 0;
374     }
375 }
376 #endif
377
378 /*****************************************************************************/
379 /* Altivec extension helpers */
380 #if defined(HOST_WORDS_BIGENDIAN)
381 #define HI_IDX 0
382 #define LO_IDX 1
383 #define AVRB(i) u8[i]
384 #define AVRW(i) u32[i]
385 #else
386 #define HI_IDX 1
387 #define LO_IDX 0
388 #define AVRB(i) u8[15-(i)]
389 #define AVRW(i) u32[3-(i)]
390 #endif
391
392 #if defined(HOST_WORDS_BIGENDIAN)
393 #define VECTOR_FOR_INORDER_I(index, element)                    \
394     for (index = 0; index < ARRAY_SIZE(r->element); index++)
395 #else
396 #define VECTOR_FOR_INORDER_I(index, element)                    \
397     for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
398 #endif
399
400 /* Saturating arithmetic helpers.  */
401 #define SATCVT(from, to, from_type, to_type, min, max)          \
402     static inline to_type cvt##from##to(from_type x, int *sat)  \
403     {                                                           \
404         to_type r;                                              \
405                                                                 \
406         if (x < (from_type)min) {                               \
407             r = min;                                            \
408             *sat = 1;                                           \
409         } else if (x > (from_type)max) {                        \
410             r = max;                                            \
411             *sat = 1;                                           \
412         } else {                                                \
413             r = x;                                              \
414         }                                                       \
415         return r;                                               \
416     }
417 #define SATCVTU(from, to, from_type, to_type, min, max)         \
418     static inline to_type cvt##from##to(from_type x, int *sat)  \
419     {                                                           \
420         to_type r;                                              \
421                                                                 \
422         if (x > (from_type)max) {                               \
423             r = max;                                            \
424             *sat = 1;                                           \
425         } else {                                                \
426             r = x;                                              \
427         }                                                       \
428         return r;                                               \
429     }
430 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
431 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
432 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
433
434 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
435 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
436 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
437 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
438 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
439 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
440 #undef SATCVT
441 #undef SATCVTU
442
443 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
444 {
445     int i, j = (sh & 0xf);
446
447     VECTOR_FOR_INORDER_I(i, u8) {
448         r->u8[i] = j++;
449     }
450 }
451
452 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
453 {
454     int i, j = 0x10 - (sh & 0xf);
455
456     VECTOR_FOR_INORDER_I(i, u8) {
457         r->u8[i] = j++;
458     }
459 }
460
461 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
462 {
463 #if defined(HOST_WORDS_BIGENDIAN)
464     env->vscr = r->u32[3];
465 #else
466     env->vscr = r->u32[0];
467 #endif
468     set_flush_to_zero(vscr_nj, &env->vec_status);
469 }
470
471 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
472 {
473     int i;
474
475     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
476         r->u32[i] = ~a->u32[i] < b->u32[i];
477     }
478 }
479
480 #define VARITH_DO(name, op, element)                                    \
481     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
482     {                                                                   \
483         int i;                                                          \
484                                                                         \
485         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
486             r->element[i] = a->element[i] op b->element[i];             \
487         }                                                               \
488     }
489 #define VARITH(suffix, element)                 \
490     VARITH_DO(add##suffix, +, element)          \
491     VARITH_DO(sub##suffix, -, element)
492 VARITH(ubm, u8)
493 VARITH(uhm, u16)
494 VARITH(uwm, u32)
495 VARITH(udm, u64)
496 VARITH_DO(muluwm, *, u32)
497 #undef VARITH_DO
498 #undef VARITH
499
500 #define VARITHFP(suffix, func)                                          \
501     void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
502                           ppc_avr_t *b)                                 \
503     {                                                                   \
504         int i;                                                          \
505                                                                         \
506         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
507             r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
508         }                                                               \
509     }
510 VARITHFP(addfp, float32_add)
511 VARITHFP(subfp, float32_sub)
512 VARITHFP(minfp, float32_min)
513 VARITHFP(maxfp, float32_max)
514 #undef VARITHFP
515
516 #define VARITHFPFMA(suffix, type)                                       \
517     void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
518                            ppc_avr_t *b, ppc_avr_t *c)                  \
519     {                                                                   \
520         int i;                                                          \
521         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
522             r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
523                                      type, &env->vec_status);           \
524         }                                                               \
525     }
526 VARITHFPFMA(maddfp, 0);
527 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
528 #undef VARITHFPFMA
529
530 #define VARITHSAT_CASE(type, op, cvt, element)                          \
531     {                                                                   \
532         type result = (type)a->element[i] op (type)b->element[i];       \
533         r->element[i] = cvt(result, &sat);                              \
534     }
535
536 #define VARITHSAT_DO(name, op, optype, cvt, element)                    \
537     void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,   \
538                         ppc_avr_t *b)                                   \
539     {                                                                   \
540         int sat = 0;                                                    \
541         int i;                                                          \
542                                                                         \
543         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
544             switch (sizeof(r->element[0])) {                            \
545             case 1:                                                     \
546                 VARITHSAT_CASE(optype, op, cvt, element);               \
547                 break;                                                  \
548             case 2:                                                     \
549                 VARITHSAT_CASE(optype, op, cvt, element);               \
550                 break;                                                  \
551             case 4:                                                     \
552                 VARITHSAT_CASE(optype, op, cvt, element);               \
553                 break;                                                  \
554             }                                                           \
555         }                                                               \
556         if (sat) {                                                      \
557             env->vscr |= (1 << VSCR_SAT);                               \
558         }                                                               \
559     }
560 #define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
561     VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
562     VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
563 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
564     VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
565     VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
566 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
567 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
568 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
569 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
570 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
571 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
572 #undef VARITHSAT_CASE
573 #undef VARITHSAT_DO
574 #undef VARITHSAT_SIGNED
575 #undef VARITHSAT_UNSIGNED
576
577 #define VAVG_DO(name, element, etype)                                   \
578     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
579     {                                                                   \
580         int i;                                                          \
581                                                                         \
582         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
583             etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
584             r->element[i] = x >> 1;                                     \
585         }                                                               \
586     }
587
588 #define VAVG(type, signed_element, signed_type, unsigned_element,       \
589              unsigned_type)                                             \
590     VAVG_DO(avgs##type, signed_element, signed_type)                    \
591     VAVG_DO(avgu##type, unsigned_element, unsigned_type)
592 VAVG(b, s8, int16_t, u8, uint16_t)
593 VAVG(h, s16, int32_t, u16, uint32_t)
594 VAVG(w, s32, int64_t, u32, uint64_t)
595 #undef VAVG_DO
596 #undef VAVG
597
598 #define VCF(suffix, cvt, element)                                       \
599     void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
600                             ppc_avr_t *b, uint32_t uim)                 \
601     {                                                                   \
602         int i;                                                          \
603                                                                         \
604         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
605             float32 t = cvt(b->element[i], &env->vec_status);           \
606             r->f[i] = float32_scalbn(t, -uim, &env->vec_status);        \
607         }                                                               \
608     }
609 VCF(ux, uint32_to_float32, u32)
610 VCF(sx, int32_to_float32, s32)
611 #undef VCF
612
613 #define VCMP_DO(suffix, compare, element, record)                       \
614     void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
615                              ppc_avr_t *a, ppc_avr_t *b)                \
616     {                                                                   \
617         uint64_t ones = (uint64_t)-1;                                   \
618         uint64_t all = ones;                                            \
619         uint64_t none = 0;                                              \
620         int i;                                                          \
621                                                                         \
622         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
623             uint64_t result = (a->element[i] compare b->element[i] ?    \
624                                ones : 0x0);                             \
625             switch (sizeof(a->element[0])) {                            \
626             case 8:                                                     \
627                 r->u64[i] = result;                                     \
628                 break;                                                  \
629             case 4:                                                     \
630                 r->u32[i] = result;                                     \
631                 break;                                                  \
632             case 2:                                                     \
633                 r->u16[i] = result;                                     \
634                 break;                                                  \
635             case 1:                                                     \
636                 r->u8[i] = result;                                      \
637                 break;                                                  \
638             }                                                           \
639             all &= result;                                              \
640             none |= result;                                             \
641         }                                                               \
642         if (record) {                                                   \
643             env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
644         }                                                               \
645     }
646 #define VCMP(suffix, compare, element)          \
647     VCMP_DO(suffix, compare, element, 0)        \
648     VCMP_DO(suffix##_dot, compare, element, 1)
649 VCMP(equb, ==, u8)
650 VCMP(equh, ==, u16)
651 VCMP(equw, ==, u32)
652 VCMP(equd, ==, u64)
653 VCMP(gtub, >, u8)
654 VCMP(gtuh, >, u16)
655 VCMP(gtuw, >, u32)
656 VCMP(gtud, >, u64)
657 VCMP(gtsb, >, s8)
658 VCMP(gtsh, >, s16)
659 VCMP(gtsw, >, s32)
660 VCMP(gtsd, >, s64)
661 #undef VCMP_DO
662 #undef VCMP
663
664 #define VCMPFP_DO(suffix, compare, order, record)                       \
665     void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
666                              ppc_avr_t *a, ppc_avr_t *b)                \
667     {                                                                   \
668         uint32_t ones = (uint32_t)-1;                                   \
669         uint32_t all = ones;                                            \
670         uint32_t none = 0;                                              \
671         int i;                                                          \
672                                                                         \
673         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
674             uint32_t result;                                            \
675             int rel = float32_compare_quiet(a->f[i], b->f[i],           \
676                                             &env->vec_status);          \
677             if (rel == float_relation_unordered) {                      \
678                 result = 0;                                             \
679             } else if (rel compare order) {                             \
680                 result = ones;                                          \
681             } else {                                                    \
682                 result = 0;                                             \
683             }                                                           \
684             r->u32[i] = result;                                         \
685             all &= result;                                              \
686             none |= result;                                             \
687         }                                                               \
688         if (record) {                                                   \
689             env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
690         }                                                               \
691     }
692 #define VCMPFP(suffix, compare, order)          \
693     VCMPFP_DO(suffix, compare, order, 0)        \
694     VCMPFP_DO(suffix##_dot, compare, order, 1)
695 VCMPFP(eqfp, ==, float_relation_equal)
696 VCMPFP(gefp, !=, float_relation_less)
697 VCMPFP(gtfp, ==, float_relation_greater)
698 #undef VCMPFP_DO
699 #undef VCMPFP
700
701 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
702                                     ppc_avr_t *a, ppc_avr_t *b, int record)
703 {
704     int i;
705     int all_in = 0;
706
707     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
708         int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
709         if (le_rel == float_relation_unordered) {
710             r->u32[i] = 0xc0000000;
711             all_in = 1;
712         } else {
713             float32 bneg = float32_chs(b->f[i]);
714             int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
715             int le = le_rel != float_relation_greater;
716             int ge = ge_rel != float_relation_less;
717
718             r->u32[i] = ((!le) << 31) | ((!ge) << 30);
719             all_in |= (!le | !ge);
720         }
721     }
722     if (record) {
723         env->crf[6] = (all_in == 0) << 1;
724     }
725 }
726
727 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
728 {
729     vcmpbfp_internal(env, r, a, b, 0);
730 }
731
732 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
733                         ppc_avr_t *b)
734 {
735     vcmpbfp_internal(env, r, a, b, 1);
736 }
737
738 #define VCT(suffix, satcvt, element)                                    \
739     void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
740                             ppc_avr_t *b, uint32_t uim)                 \
741     {                                                                   \
742         int i;                                                          \
743         int sat = 0;                                                    \
744         float_status s = env->vec_status;                               \
745                                                                         \
746         set_float_rounding_mode(float_round_to_zero, &s);               \
747         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
748             if (float32_is_any_nan(b->f[i])) {                          \
749                 r->element[i] = 0;                                      \
750             } else {                                                    \
751                 float64 t = float32_to_float64(b->f[i], &s);            \
752                 int64_t j;                                              \
753                                                                         \
754                 t = float64_scalbn(t, uim, &s);                         \
755                 j = float64_to_int64(t, &s);                            \
756                 r->element[i] = satcvt(j, &sat);                        \
757             }                                                           \
758         }                                                               \
759         if (sat) {                                                      \
760             env->vscr |= (1 << VSCR_SAT);                               \
761         }                                                               \
762     }
763 VCT(uxs, cvtsduw, u32)
764 VCT(sxs, cvtsdsw, s32)
765 #undef VCT
766
767 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
768                       ppc_avr_t *b, ppc_avr_t *c)
769 {
770     int sat = 0;
771     int i;
772
773     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
774         int32_t prod = a->s16[i] * b->s16[i];
775         int32_t t = (int32_t)c->s16[i] + (prod >> 15);
776
777         r->s16[i] = cvtswsh(t, &sat);
778     }
779
780     if (sat) {
781         env->vscr |= (1 << VSCR_SAT);
782     }
783 }
784
785 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
786                        ppc_avr_t *b, ppc_avr_t *c)
787 {
788     int sat = 0;
789     int i;
790
791     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
792         int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
793         int32_t t = (int32_t)c->s16[i] + (prod >> 15);
794         r->s16[i] = cvtswsh(t, &sat);
795     }
796
797     if (sat) {
798         env->vscr |= (1 << VSCR_SAT);
799     }
800 }
801
802 #define VMINMAX_DO(name, compare, element)                              \
803     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
804     {                                                                   \
805         int i;                                                          \
806                                                                         \
807         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
808             if (a->element[i] compare b->element[i]) {                  \
809                 r->element[i] = b->element[i];                          \
810             } else {                                                    \
811                 r->element[i] = a->element[i];                          \
812             }                                                           \
813         }                                                               \
814     }
815 #define VMINMAX(suffix, element)                \
816     VMINMAX_DO(min##suffix, >, element)         \
817     VMINMAX_DO(max##suffix, <, element)
818 VMINMAX(sb, s8)
819 VMINMAX(sh, s16)
820 VMINMAX(sw, s32)
821 VMINMAX(sd, s64)
822 VMINMAX(ub, u8)
823 VMINMAX(uh, u16)
824 VMINMAX(uw, u32)
825 VMINMAX(ud, u64)
826 #undef VMINMAX_DO
827 #undef VMINMAX
828
829 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
830 {
831     int i;
832
833     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
834         int32_t prod = a->s16[i] * b->s16[i];
835         r->s16[i] = (int16_t) (prod + c->s16[i]);
836     }
837 }
838
839 #define VMRG_DO(name, element, highp)                                   \
840     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
841     {                                                                   \
842         ppc_avr_t result;                                               \
843         int i;                                                          \
844         size_t n_elems = ARRAY_SIZE(r->element);                        \
845                                                                         \
846         for (i = 0; i < n_elems / 2; i++) {                             \
847             if (highp) {                                                \
848                 result.element[i*2+HI_IDX] = a->element[i];             \
849                 result.element[i*2+LO_IDX] = b->element[i];             \
850             } else {                                                    \
851                 result.element[n_elems - i * 2 - (1 + HI_IDX)] =        \
852                     b->element[n_elems - i - 1];                        \
853                 result.element[n_elems - i * 2 - (1 + LO_IDX)] =        \
854                     a->element[n_elems - i - 1];                        \
855             }                                                           \
856         }                                                               \
857         *r = result;                                                    \
858     }
859 #if defined(HOST_WORDS_BIGENDIAN)
860 #define MRGHI 0
861 #define MRGLO 1
862 #else
863 #define MRGHI 1
864 #define MRGLO 0
865 #endif
866 #define VMRG(suffix, element)                   \
867     VMRG_DO(mrgl##suffix, element, MRGHI)       \
868     VMRG_DO(mrgh##suffix, element, MRGLO)
869 VMRG(b, u8)
870 VMRG(h, u16)
871 VMRG(w, u32)
872 #undef VMRG_DO
873 #undef VMRG
874 #undef MRGHI
875 #undef MRGLO
876
877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878                      ppc_avr_t *b, ppc_avr_t *c)
879 {
880     int32_t prod[16];
881     int i;
882
883     for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
884         prod[i] = (int32_t)a->s8[i] * b->u8[i];
885     }
886
887     VECTOR_FOR_INORDER_I(i, s32) {
888         r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
889             prod[4 * i + 2] + prod[4 * i + 3];
890     }
891 }
892
893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894                      ppc_avr_t *b, ppc_avr_t *c)
895 {
896     int32_t prod[8];
897     int i;
898
899     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900         prod[i] = a->s16[i] * b->s16[i];
901     }
902
903     VECTOR_FOR_INORDER_I(i, s32) {
904         r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
905     }
906 }
907
908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
909                      ppc_avr_t *b, ppc_avr_t *c)
910 {
911     int32_t prod[8];
912     int i;
913     int sat = 0;
914
915     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916         prod[i] = (int32_t)a->s16[i] * b->s16[i];
917     }
918
919     VECTOR_FOR_INORDER_I(i, s32) {
920         int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
921
922         r->u32[i] = cvtsdsw(t, &sat);
923     }
924
925     if (sat) {
926         env->vscr |= (1 << VSCR_SAT);
927     }
928 }
929
930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
931                      ppc_avr_t *b, ppc_avr_t *c)
932 {
933     uint16_t prod[16];
934     int i;
935
936     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937         prod[i] = a->u8[i] * b->u8[i];
938     }
939
940     VECTOR_FOR_INORDER_I(i, u32) {
941         r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
942             prod[4 * i + 2] + prod[4 * i + 3];
943     }
944 }
945
946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
947                      ppc_avr_t *b, ppc_avr_t *c)
948 {
949     uint32_t prod[8];
950     int i;
951
952     for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
953         prod[i] = a->u16[i] * b->u16[i];
954     }
955
956     VECTOR_FOR_INORDER_I(i, u32) {
957         r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
958     }
959 }
960
961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962                      ppc_avr_t *b, ppc_avr_t *c)
963 {
964     uint32_t prod[8];
965     int i;
966     int sat = 0;
967
968     for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
969         prod[i] = a->u16[i] * b->u16[i];
970     }
971
972     VECTOR_FOR_INORDER_I(i, s32) {
973         uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
974
975         r->u32[i] = cvtuduw(t, &sat);
976     }
977
978     if (sat) {
979         env->vscr |= (1 << VSCR_SAT);
980     }
981 }
982
983 #define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
984     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
985     {                                                                   \
986         int i;                                                          \
987                                                                         \
988         VECTOR_FOR_INORDER_I(i, prod_element) {                         \
989             if (evenp) {                                                \
990                 r->prod_element[i] =                                    \
991                     (cast)a->mul_element[i * 2 + HI_IDX] *              \
992                     (cast)b->mul_element[i * 2 + HI_IDX];               \
993             } else {                                                    \
994                 r->prod_element[i] =                                    \
995                     (cast)a->mul_element[i * 2 + LO_IDX] *              \
996                     (cast)b->mul_element[i * 2 + LO_IDX];               \
997             }                                                           \
998         }                                                               \
999     }
1000 #define VMUL(suffix, mul_element, prod_element, cast)            \
1001     VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
1002     VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1003 VMUL(sb, s8, s16, int16_t)
1004 VMUL(sh, s16, s32, int32_t)
1005 VMUL(sw, s32, s64, int64_t)
1006 VMUL(ub, u8, u16, uint16_t)
1007 VMUL(uh, u16, u32, uint32_t)
1008 VMUL(uw, u32, u64, uint64_t)
1009 #undef VMUL_DO
1010 #undef VMUL
1011
1012 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1013                   ppc_avr_t *c)
1014 {
1015     ppc_avr_t result;
1016     int i;
1017
1018     VECTOR_FOR_INORDER_I(i, u8) {
1019         int s = c->u8[i] & 0x1f;
1020 #if defined(HOST_WORDS_BIGENDIAN)
1021         int index = s & 0xf;
1022 #else
1023         int index = 15 - (s & 0xf);
1024 #endif
1025
1026         if (s & 0x10) {
1027             result.u8[i] = b->u8[index];
1028         } else {
1029             result.u8[i] = a->u8[index];
1030         }
1031     }
1032     *r = result;
1033 }
1034
1035 #if defined(HOST_WORDS_BIGENDIAN)
1036 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1037 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1038 #else
1039 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1040 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1041 #endif
1042
1043 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1044 {
1045     int i;
1046     uint64_t perm = 0;
1047
1048     VECTOR_FOR_INORDER_I(i, u8) {
1049         int index = VBPERMQ_INDEX(b, i);
1050
1051         if (index < 128) {
1052             uint64_t mask = (1ull << (63-(index & 0x3F)));
1053             if (a->u64[VBPERMQ_DW(index)] & mask) {
1054                 perm |= (0x8000 >> i);
1055             }
1056         }
1057     }
1058
1059     r->u64[HI_IDX] = perm;
1060     r->u64[LO_IDX] = 0;
1061 }
1062
1063 #undef VBPERMQ_INDEX
1064 #undef VBPERMQ_DW
1065
1066 static const uint64_t VGBBD_MASKS[256] = {
1067     0x0000000000000000ull, /* 00 */
1068     0x0000000000000080ull, /* 01 */
1069     0x0000000000008000ull, /* 02 */
1070     0x0000000000008080ull, /* 03 */
1071     0x0000000000800000ull, /* 04 */
1072     0x0000000000800080ull, /* 05 */
1073     0x0000000000808000ull, /* 06 */
1074     0x0000000000808080ull, /* 07 */
1075     0x0000000080000000ull, /* 08 */
1076     0x0000000080000080ull, /* 09 */
1077     0x0000000080008000ull, /* 0A */
1078     0x0000000080008080ull, /* 0B */
1079     0x0000000080800000ull, /* 0C */
1080     0x0000000080800080ull, /* 0D */
1081     0x0000000080808000ull, /* 0E */
1082     0x0000000080808080ull, /* 0F */
1083     0x0000008000000000ull, /* 10 */
1084     0x0000008000000080ull, /* 11 */
1085     0x0000008000008000ull, /* 12 */
1086     0x0000008000008080ull, /* 13 */
1087     0x0000008000800000ull, /* 14 */
1088     0x0000008000800080ull, /* 15 */
1089     0x0000008000808000ull, /* 16 */
1090     0x0000008000808080ull, /* 17 */
1091     0x0000008080000000ull, /* 18 */
1092     0x0000008080000080ull, /* 19 */
1093     0x0000008080008000ull, /* 1A */
1094     0x0000008080008080ull, /* 1B */
1095     0x0000008080800000ull, /* 1C */
1096     0x0000008080800080ull, /* 1D */
1097     0x0000008080808000ull, /* 1E */
1098     0x0000008080808080ull, /* 1F */
1099     0x0000800000000000ull, /* 20 */
1100     0x0000800000000080ull, /* 21 */
1101     0x0000800000008000ull, /* 22 */
1102     0x0000800000008080ull, /* 23 */
1103     0x0000800000800000ull, /* 24 */
1104     0x0000800000800080ull, /* 25 */
1105     0x0000800000808000ull, /* 26 */
1106     0x0000800000808080ull, /* 27 */
1107     0x0000800080000000ull, /* 28 */
1108     0x0000800080000080ull, /* 29 */
1109     0x0000800080008000ull, /* 2A */
1110     0x0000800080008080ull, /* 2B */
1111     0x0000800080800000ull, /* 2C */
1112     0x0000800080800080ull, /* 2D */
1113     0x0000800080808000ull, /* 2E */
1114     0x0000800080808080ull, /* 2F */
1115     0x0000808000000000ull, /* 30 */
1116     0x0000808000000080ull, /* 31 */
1117     0x0000808000008000ull, /* 32 */
1118     0x0000808000008080ull, /* 33 */
1119     0x0000808000800000ull, /* 34 */
1120     0x0000808000800080ull, /* 35 */
1121     0x0000808000808000ull, /* 36 */
1122     0x0000808000808080ull, /* 37 */
1123     0x0000808080000000ull, /* 38 */
1124     0x0000808080000080ull, /* 39 */
1125     0x0000808080008000ull, /* 3A */
1126     0x0000808080008080ull, /* 3B */
1127     0x0000808080800000ull, /* 3C */
1128     0x0000808080800080ull, /* 3D */
1129     0x0000808080808000ull, /* 3E */
1130     0x0000808080808080ull, /* 3F */
1131     0x0080000000000000ull, /* 40 */
1132     0x0080000000000080ull, /* 41 */
1133     0x0080000000008000ull, /* 42 */
1134     0x0080000000008080ull, /* 43 */
1135     0x0080000000800000ull, /* 44 */
1136     0x0080000000800080ull, /* 45 */
1137     0x0080000000808000ull, /* 46 */
1138     0x0080000000808080ull, /* 47 */
1139     0x0080000080000000ull, /* 48 */
1140     0x0080000080000080ull, /* 49 */
1141     0x0080000080008000ull, /* 4A */
1142     0x0080000080008080ull, /* 4B */
1143     0x0080000080800000ull, /* 4C */
1144     0x0080000080800080ull, /* 4D */
1145     0x0080000080808000ull, /* 4E */
1146     0x0080000080808080ull, /* 4F */
1147     0x0080008000000000ull, /* 50 */
1148     0x0080008000000080ull, /* 51 */
1149     0x0080008000008000ull, /* 52 */
1150     0x0080008000008080ull, /* 53 */
1151     0x0080008000800000ull, /* 54 */
1152     0x0080008000800080ull, /* 55 */
1153     0x0080008000808000ull, /* 56 */
1154     0x0080008000808080ull, /* 57 */
1155     0x0080008080000000ull, /* 58 */
1156     0x0080008080000080ull, /* 59 */
1157     0x0080008080008000ull, /* 5A */
1158     0x0080008080008080ull, /* 5B */
1159     0x0080008080800000ull, /* 5C */
1160     0x0080008080800080ull, /* 5D */
1161     0x0080008080808000ull, /* 5E */
1162     0x0080008080808080ull, /* 5F */
1163     0x0080800000000000ull, /* 60 */
1164     0x0080800000000080ull, /* 61 */
1165     0x0080800000008000ull, /* 62 */
1166     0x0080800000008080ull, /* 63 */
1167     0x0080800000800000ull, /* 64 */
1168     0x0080800000800080ull, /* 65 */
1169     0x0080800000808000ull, /* 66 */
1170     0x0080800000808080ull, /* 67 */
1171     0x0080800080000000ull, /* 68 */
1172     0x0080800080000080ull, /* 69 */
1173     0x0080800080008000ull, /* 6A */
1174     0x0080800080008080ull, /* 6B */
1175     0x0080800080800000ull, /* 6C */
1176     0x0080800080800080ull, /* 6D */
1177     0x0080800080808000ull, /* 6E */
1178     0x0080800080808080ull, /* 6F */
1179     0x0080808000000000ull, /* 70 */
1180     0x0080808000000080ull, /* 71 */
1181     0x0080808000008000ull, /* 72 */
1182     0x0080808000008080ull, /* 73 */
1183     0x0080808000800000ull, /* 74 */
1184     0x0080808000800080ull, /* 75 */
1185     0x0080808000808000ull, /* 76 */
1186     0x0080808000808080ull, /* 77 */
1187     0x0080808080000000ull, /* 78 */
1188     0x0080808080000080ull, /* 79 */
1189     0x0080808080008000ull, /* 7A */
1190     0x0080808080008080ull, /* 7B */
1191     0x0080808080800000ull, /* 7C */
1192     0x0080808080800080ull, /* 7D */
1193     0x0080808080808000ull, /* 7E */
1194     0x0080808080808080ull, /* 7F */
1195     0x8000000000000000ull, /* 80 */
1196     0x8000000000000080ull, /* 81 */
1197     0x8000000000008000ull, /* 82 */
1198     0x8000000000008080ull, /* 83 */
1199     0x8000000000800000ull, /* 84 */
1200     0x8000000000800080ull, /* 85 */
1201     0x8000000000808000ull, /* 86 */
1202     0x8000000000808080ull, /* 87 */
1203     0x8000000080000000ull, /* 88 */
1204     0x8000000080000080ull, /* 89 */
1205     0x8000000080008000ull, /* 8A */
1206     0x8000000080008080ull, /* 8B */
1207     0x8000000080800000ull, /* 8C */
1208     0x8000000080800080ull, /* 8D */
1209     0x8000000080808000ull, /* 8E */
1210     0x8000000080808080ull, /* 8F */
1211     0x8000008000000000ull, /* 90 */
1212     0x8000008000000080ull, /* 91 */
1213     0x8000008000008000ull, /* 92 */
1214     0x8000008000008080ull, /* 93 */
1215     0x8000008000800000ull, /* 94 */
1216     0x8000008000800080ull, /* 95 */
1217     0x8000008000808000ull, /* 96 */
1218     0x8000008000808080ull, /* 97 */
1219     0x8000008080000000ull, /* 98 */
1220     0x8000008080000080ull, /* 99 */
1221     0x8000008080008000ull, /* 9A */
1222     0x8000008080008080ull, /* 9B */
1223     0x8000008080800000ull, /* 9C */
1224     0x8000008080800080ull, /* 9D */
1225     0x8000008080808000ull, /* 9E */
1226     0x8000008080808080ull, /* 9F */
1227     0x8000800000000000ull, /* A0 */
1228     0x8000800000000080ull, /* A1 */
1229     0x8000800000008000ull, /* A2 */
1230     0x8000800000008080ull, /* A3 */
1231     0x8000800000800000ull, /* A4 */
1232     0x8000800000800080ull, /* A5 */
1233     0x8000800000808000ull, /* A6 */
1234     0x8000800000808080ull, /* A7 */
1235     0x8000800080000000ull, /* A8 */
1236     0x8000800080000080ull, /* A9 */
1237     0x8000800080008000ull, /* AA */
1238     0x8000800080008080ull, /* AB */
1239     0x8000800080800000ull, /* AC */
1240     0x8000800080800080ull, /* AD */
1241     0x8000800080808000ull, /* AE */
1242     0x8000800080808080ull, /* AF */
1243     0x8000808000000000ull, /* B0 */
1244     0x8000808000000080ull, /* B1 */
1245     0x8000808000008000ull, /* B2 */
1246     0x8000808000008080ull, /* B3 */
1247     0x8000808000800000ull, /* B4 */
1248     0x8000808000800080ull, /* B5 */
1249     0x8000808000808000ull, /* B6 */
1250     0x8000808000808080ull, /* B7 */
1251     0x8000808080000000ull, /* B8 */
1252     0x8000808080000080ull, /* B9 */
1253     0x8000808080008000ull, /* BA */
1254     0x8000808080008080ull, /* BB */
1255     0x8000808080800000ull, /* BC */
1256     0x8000808080800080ull, /* BD */
1257     0x8000808080808000ull, /* BE */
1258     0x8000808080808080ull, /* BF */
1259     0x8080000000000000ull, /* C0 */
1260     0x8080000000000080ull, /* C1 */
1261     0x8080000000008000ull, /* C2 */
1262     0x8080000000008080ull, /* C3 */
1263     0x8080000000800000ull, /* C4 */
1264     0x8080000000800080ull, /* C5 */
1265     0x8080000000808000ull, /* C6 */
1266     0x8080000000808080ull, /* C7 */
1267     0x8080000080000000ull, /* C8 */
1268     0x8080000080000080ull, /* C9 */
1269     0x8080000080008000ull, /* CA */
1270     0x8080000080008080ull, /* CB */
1271     0x8080000080800000ull, /* CC */
1272     0x8080000080800080ull, /* CD */
1273     0x8080000080808000ull, /* CE */
1274     0x8080000080808080ull, /* CF */
1275     0x8080008000000000ull, /* D0 */
1276     0x8080008000000080ull, /* D1 */
1277     0x8080008000008000ull, /* D2 */
1278     0x8080008000008080ull, /* D3 */
1279     0x8080008000800000ull, /* D4 */
1280     0x8080008000800080ull, /* D5 */
1281     0x8080008000808000ull, /* D6 */
1282     0x8080008000808080ull, /* D7 */
1283     0x8080008080000000ull, /* D8 */
1284     0x8080008080000080ull, /* D9 */
1285     0x8080008080008000ull, /* DA */
1286     0x8080008080008080ull, /* DB */
1287     0x8080008080800000ull, /* DC */
1288     0x8080008080800080ull, /* DD */
1289     0x8080008080808000ull, /* DE */
1290     0x8080008080808080ull, /* DF */
1291     0x8080800000000000ull, /* E0 */
1292     0x8080800000000080ull, /* E1 */
1293     0x8080800000008000ull, /* E2 */
1294     0x8080800000008080ull, /* E3 */
1295     0x8080800000800000ull, /* E4 */
1296     0x8080800000800080ull, /* E5 */
1297     0x8080800000808000ull, /* E6 */
1298     0x8080800000808080ull, /* E7 */
1299     0x8080800080000000ull, /* E8 */
1300     0x8080800080000080ull, /* E9 */
1301     0x8080800080008000ull, /* EA */
1302     0x8080800080008080ull, /* EB */
1303     0x8080800080800000ull, /* EC */
1304     0x8080800080800080ull, /* ED */
1305     0x8080800080808000ull, /* EE */
1306     0x8080800080808080ull, /* EF */
1307     0x8080808000000000ull, /* F0 */
1308     0x8080808000000080ull, /* F1 */
1309     0x8080808000008000ull, /* F2 */
1310     0x8080808000008080ull, /* F3 */
1311     0x8080808000800000ull, /* F4 */
1312     0x8080808000800080ull, /* F5 */
1313     0x8080808000808000ull, /* F6 */
1314     0x8080808000808080ull, /* F7 */
1315     0x8080808080000000ull, /* F8 */
1316     0x8080808080000080ull, /* F9 */
1317     0x8080808080008000ull, /* FA */
1318     0x8080808080008080ull, /* FB */
1319     0x8080808080800000ull, /* FC */
1320     0x8080808080800080ull, /* FD */
1321     0x8080808080808000ull, /* FE */
1322     0x8080808080808080ull, /* FF */
1323 };
1324
1325 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1326 {
1327     int i;
1328     uint64_t t[2] = { 0, 0 };
1329
1330     VECTOR_FOR_INORDER_I(i, u8) {
1331 #if defined(HOST_WORDS_BIGENDIAN)
1332         t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1333 #else
1334         t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1335 #endif
1336     }
1337
1338     r->u64[0] = t[0];
1339     r->u64[1] = t[1];
1340 }
1341
1342 #define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1343 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1344 {                                                             \
1345     int i, j;                                                 \
1346     trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])];      \
1347                                                               \
1348     VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1349         prod[i] = 0;                                          \
1350         for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1351             if (a->srcfld[i] & (1ull<<j)) {                   \
1352                 prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1353             }                                                 \
1354         }                                                     \
1355     }                                                         \
1356                                                               \
1357     VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1358         r->trgfld[i] = prod[2*i] ^ prod[2*i+1];               \
1359     }                                                         \
1360 }
1361
1362 PMSUM(vpmsumb, u8, u16, uint16_t)
1363 PMSUM(vpmsumh, u16, u32, uint32_t)
1364 PMSUM(vpmsumw, u32, u64, uint64_t)
1365
1366 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1367 {
1368
1369 #ifdef CONFIG_INT128
1370     int i, j;
1371     __uint128_t prod[2];
1372
1373     VECTOR_FOR_INORDER_I(i, u64) {
1374         prod[i] = 0;
1375         for (j = 0; j < 64; j++) {
1376             if (a->u64[i] & (1ull<<j)) {
1377                 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1378             }
1379         }
1380     }
1381
1382     r->u128 = prod[0] ^ prod[1];
1383
1384 #else
1385     int i, j;
1386     ppc_avr_t prod[2];
1387
1388     VECTOR_FOR_INORDER_I(i, u64) {
1389         prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1390         for (j = 0; j < 64; j++) {
1391             if (a->u64[i] & (1ull<<j)) {
1392                 ppc_avr_t bshift;
1393                 if (j == 0) {
1394                     bshift.u64[HI_IDX] = 0;
1395                     bshift.u64[LO_IDX] = b->u64[i];
1396                 } else {
1397                     bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1398                     bshift.u64[LO_IDX] = b->u64[i] << j;
1399                 }
1400                 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1401                 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1402             }
1403         }
1404     }
1405
1406     r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1407     r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1408 #endif
1409 }
1410
1411
1412 #if defined(HOST_WORDS_BIGENDIAN)
1413 #define PKBIG 1
1414 #else
1415 #define PKBIG 0
1416 #endif
1417 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1418 {
1419     int i, j;
1420     ppc_avr_t result;
1421 #if defined(HOST_WORDS_BIGENDIAN)
1422     const ppc_avr_t *x[2] = { a, b };
1423 #else
1424     const ppc_avr_t *x[2] = { b, a };
1425 #endif
1426
1427     VECTOR_FOR_INORDER_I(i, u64) {
1428         VECTOR_FOR_INORDER_I(j, u32) {
1429             uint32_t e = x[i]->u32[j];
1430
1431             result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1432                                  ((e >> 6) & 0x3e0) |
1433                                  ((e >> 3) & 0x1f));
1434         }
1435     }
1436     *r = result;
1437 }
1438
1439 #define VPK(suffix, from, to, cvt, dosat)                               \
1440     void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1441                             ppc_avr_t *a, ppc_avr_t *b)                 \
1442     {                                                                   \
1443         int i;                                                          \
1444         int sat = 0;                                                    \
1445         ppc_avr_t result;                                               \
1446         ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1447         ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1448                                                                         \
1449         VECTOR_FOR_INORDER_I(i, from) {                                 \
1450             result.to[i] = cvt(a0->from[i], &sat);                      \
1451             result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);  \
1452         }                                                               \
1453         *r = result;                                                    \
1454         if (dosat && sat) {                                             \
1455             env->vscr |= (1 << VSCR_SAT);                               \
1456         }                                                               \
1457     }
1458 #define I(x, y) (x)
1459 VPK(shss, s16, s8, cvtshsb, 1)
1460 VPK(shus, s16, u8, cvtshub, 1)
1461 VPK(swss, s32, s16, cvtswsh, 1)
1462 VPK(swus, s32, u16, cvtswuh, 1)
1463 VPK(sdss, s64, s32, cvtsdsw, 1)
1464 VPK(sdus, s64, u32, cvtsduw, 1)
1465 VPK(uhus, u16, u8, cvtuhub, 1)
1466 VPK(uwus, u32, u16, cvtuwuh, 1)
1467 VPK(udus, u64, u32, cvtuduw, 1)
1468 VPK(uhum, u16, u8, I, 0)
1469 VPK(uwum, u32, u16, I, 0)
1470 VPK(udum, u64, u32, I, 0)
1471 #undef I
1472 #undef VPK
1473 #undef PKBIG
1474
1475 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1476 {
1477     int i;
1478
1479     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1480         r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1481     }
1482 }
1483
1484 #define VRFI(suffix, rounding)                                  \
1485     void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1486                              ppc_avr_t *b)                      \
1487     {                                                           \
1488         int i;                                                  \
1489         float_status s = env->vec_status;                       \
1490                                                                 \
1491         set_float_rounding_mode(rounding, &s);                  \
1492         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
1493             r->f[i] = float32_round_to_int (b->f[i], &s);       \
1494         }                                                       \
1495     }
1496 VRFI(n, float_round_nearest_even)
1497 VRFI(m, float_round_down)
1498 VRFI(p, float_round_up)
1499 VRFI(z, float_round_to_zero)
1500 #undef VRFI
1501
1502 #define VROTATE(suffix, element, mask)                                  \
1503     void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1504     {                                                                   \
1505         int i;                                                          \
1506                                                                         \
1507         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1508             unsigned int shift = b->element[i] & mask;                  \
1509             r->element[i] = (a->element[i] << shift) |                  \
1510                 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1511         }                                                               \
1512     }
1513 VROTATE(b, u8, 0x7)
1514 VROTATE(h, u16, 0xF)
1515 VROTATE(w, u32, 0x1F)
1516 VROTATE(d, u64, 0x3F)
1517 #undef VROTATE
1518
1519 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1520 {
1521     int i;
1522
1523     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1524         float32 t = float32_sqrt(b->f[i], &env->vec_status);
1525
1526         r->f[i] = float32_div(float32_one, t, &env->vec_status);
1527     }
1528 }
1529
1530 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1531                  ppc_avr_t *c)
1532 {
1533     r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1534     r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1535 }
1536
1537 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1538 {
1539     int i;
1540
1541     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1542         r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1543     }
1544 }
1545
1546 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1547 {
1548     int i;
1549
1550     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1551         r->f[i] = float32_log2(b->f[i], &env->vec_status);
1552     }
1553 }
1554
1555 /* The specification says that the results are undefined if all of the
1556  * shift counts are not identical.  We check to make sure that they are
1557  * to conform to what real hardware appears to do.  */
1558 #define VSHIFT(suffix, leftp)                                           \
1559     void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1560     {                                                                   \
1561         int shift = b->u8[LO_IDX*15] & 0x7;                             \
1562         int doit = 1;                                                   \
1563         int i;                                                          \
1564                                                                         \
1565         for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1566             doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1567         }                                                               \
1568         if (doit) {                                                     \
1569             if (shift == 0) {                                           \
1570                 *r = *a;                                                \
1571             } else if (leftp) {                                         \
1572                 uint64_t carry = a->u64[LO_IDX] >> (64 - shift);        \
1573                                                                         \
1574                 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry;     \
1575                 r->u64[LO_IDX] = a->u64[LO_IDX] << shift;               \
1576             } else {                                                    \
1577                 uint64_t carry = a->u64[HI_IDX] << (64 - shift);        \
1578                                                                         \
1579                 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry;     \
1580                 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift;               \
1581             }                                                           \
1582         }                                                               \
1583     }
1584 VSHIFT(l, 1)
1585 VSHIFT(r, 0)
1586 #undef VSHIFT
1587
1588 #define VSL(suffix, element, mask)                                      \
1589     void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1590     {                                                                   \
1591         int i;                                                          \
1592                                                                         \
1593         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1594             unsigned int shift = b->element[i] & mask;                  \
1595                                                                         \
1596             r->element[i] = a->element[i] << shift;                     \
1597         }                                                               \
1598     }
1599 VSL(b, u8, 0x7)
1600 VSL(h, u16, 0x0F)
1601 VSL(w, u32, 0x1F)
1602 VSL(d, u64, 0x3F)
1603 #undef VSL
1604
1605 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1606 {
1607     int sh = shift & 0xf;
1608     int i;
1609     ppc_avr_t result;
1610
1611 #if defined(HOST_WORDS_BIGENDIAN)
1612     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1613         int index = sh + i;
1614         if (index > 0xf) {
1615             result.u8[i] = b->u8[index - 0x10];
1616         } else {
1617             result.u8[i] = a->u8[index];
1618         }
1619     }
1620 #else
1621     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1622         int index = (16 - sh) + i;
1623         if (index > 0xf) {
1624             result.u8[i] = a->u8[index - 0x10];
1625         } else {
1626             result.u8[i] = b->u8[index];
1627         }
1628     }
1629 #endif
1630     *r = result;
1631 }
1632
1633 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1634 {
1635     int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1636
1637 #if defined(HOST_WORDS_BIGENDIAN)
1638     memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1639     memset(&r->u8[16-sh], 0, sh);
1640 #else
1641     memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1642     memset(&r->u8[0], 0, sh);
1643 #endif
1644 }
1645
1646 /* Experimental testing shows that hardware masks the immediate.  */
1647 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1648 #if defined(HOST_WORDS_BIGENDIAN)
1649 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1650 #else
1651 #define SPLAT_ELEMENT(element)                                  \
1652     (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1653 #endif
1654 #define VSPLT(suffix, element)                                          \
1655     void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1656     {                                                                   \
1657         uint32_t s = b->element[SPLAT_ELEMENT(element)];                \
1658         int i;                                                          \
1659                                                                         \
1660         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1661             r->element[i] = s;                                          \
1662         }                                                               \
1663     }
1664 VSPLT(b, u8)
1665 VSPLT(h, u16)
1666 VSPLT(w, u32)
1667 #undef VSPLT
1668 #undef SPLAT_ELEMENT
1669 #undef _SPLAT_MASKED
1670
1671 #define VSPLTI(suffix, element, splat_type)                     \
1672     void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
1673     {                                                           \
1674         splat_type x = (int8_t)(splat << 3) >> 3;               \
1675         int i;                                                  \
1676                                                                 \
1677         for (i = 0; i < ARRAY_SIZE(r->element); i++) {          \
1678             r->element[i] = x;                                  \
1679         }                                                       \
1680     }
1681 VSPLTI(b, s8, int8_t)
1682 VSPLTI(h, s16, int16_t)
1683 VSPLTI(w, s32, int32_t)
1684 #undef VSPLTI
1685
1686 #define VSR(suffix, element, mask)                                      \
1687     void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1688     {                                                                   \
1689         int i;                                                          \
1690                                                                         \
1691         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1692             unsigned int shift = b->element[i] & mask;                  \
1693             r->element[i] = a->element[i] >> shift;                     \
1694         }                                                               \
1695     }
1696 VSR(ab, s8, 0x7)
1697 VSR(ah, s16, 0xF)
1698 VSR(aw, s32, 0x1F)
1699 VSR(ad, s64, 0x3F)
1700 VSR(b, u8, 0x7)
1701 VSR(h, u16, 0xF)
1702 VSR(w, u32, 0x1F)
1703 VSR(d, u64, 0x3F)
1704 #undef VSR
1705
1706 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1707 {
1708     int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1709
1710 #if defined(HOST_WORDS_BIGENDIAN)
1711     memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1712     memset(&r->u8[0], 0, sh);
1713 #else
1714     memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1715     memset(&r->u8[16 - sh], 0, sh);
1716 #endif
1717 }
1718
1719 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1720 {
1721     int i;
1722
1723     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1724         r->u32[i] = a->u32[i] >= b->u32[i];
1725     }
1726 }
1727
1728 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1729 {
1730     int64_t t;
1731     int i, upper;
1732     ppc_avr_t result;
1733     int sat = 0;
1734
1735 #if defined(HOST_WORDS_BIGENDIAN)
1736     upper = ARRAY_SIZE(r->s32)-1;
1737 #else
1738     upper = 0;
1739 #endif
1740     t = (int64_t)b->s32[upper];
1741     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1742         t += a->s32[i];
1743         result.s32[i] = 0;
1744     }
1745     result.s32[upper] = cvtsdsw(t, &sat);
1746     *r = result;
1747
1748     if (sat) {
1749         env->vscr |= (1 << VSCR_SAT);
1750     }
1751 }
1752
1753 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1754 {
1755     int i, j, upper;
1756     ppc_avr_t result;
1757     int sat = 0;
1758
1759 #if defined(HOST_WORDS_BIGENDIAN)
1760     upper = 1;
1761 #else
1762     upper = 0;
1763 #endif
1764     for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1765         int64_t t = (int64_t)b->s32[upper + i * 2];
1766
1767         result.u64[i] = 0;
1768         for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1769             t += a->s32[2 * i + j];
1770         }
1771         result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1772     }
1773
1774     *r = result;
1775     if (sat) {
1776         env->vscr |= (1 << VSCR_SAT);
1777     }
1778 }
1779
1780 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1781 {
1782     int i, j;
1783     int sat = 0;
1784
1785     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1786         int64_t t = (int64_t)b->s32[i];
1787
1788         for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1789             t += a->s8[4 * i + j];
1790         }
1791         r->s32[i] = cvtsdsw(t, &sat);
1792     }
1793
1794     if (sat) {
1795         env->vscr |= (1 << VSCR_SAT);
1796     }
1797 }
1798
1799 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1800 {
1801     int sat = 0;
1802     int i;
1803
1804     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1805         int64_t t = (int64_t)b->s32[i];
1806
1807         t += a->s16[2 * i] + a->s16[2 * i + 1];
1808         r->s32[i] = cvtsdsw(t, &sat);
1809     }
1810
1811     if (sat) {
1812         env->vscr |= (1 << VSCR_SAT);
1813     }
1814 }
1815
1816 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1817 {
1818     int i, j;
1819     int sat = 0;
1820
1821     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1822         uint64_t t = (uint64_t)b->u32[i];
1823
1824         for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1825             t += a->u8[4 * i + j];
1826         }
1827         r->u32[i] = cvtuduw(t, &sat);
1828     }
1829
1830     if (sat) {
1831         env->vscr |= (1 << VSCR_SAT);
1832     }
1833 }
1834
1835 #if defined(HOST_WORDS_BIGENDIAN)
1836 #define UPKHI 1
1837 #define UPKLO 0
1838 #else
1839 #define UPKHI 0
1840 #define UPKLO 1
1841 #endif
1842 #define VUPKPX(suffix, hi)                                              \
1843     void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1844     {                                                                   \
1845         int i;                                                          \
1846         ppc_avr_t result;                                               \
1847                                                                         \
1848         for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
1849             uint16_t e = b->u16[hi ? i : i+4];                          \
1850             uint8_t a = (e >> 15) ? 0xff : 0;                           \
1851             uint8_t r = (e >> 10) & 0x1f;                               \
1852             uint8_t g = (e >> 5) & 0x1f;                                \
1853             uint8_t b = e & 0x1f;                                       \
1854                                                                         \
1855             result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
1856         }                                                               \
1857         *r = result;                                                    \
1858     }
1859 VUPKPX(lpx, UPKLO)
1860 VUPKPX(hpx, UPKHI)
1861 #undef VUPKPX
1862
1863 #define VUPK(suffix, unpacked, packee, hi)                              \
1864     void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1865     {                                                                   \
1866         int i;                                                          \
1867         ppc_avr_t result;                                               \
1868                                                                         \
1869         if (hi) {                                                       \
1870             for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
1871                 result.unpacked[i] = b->packee[i];                      \
1872             }                                                           \
1873         } else {                                                        \
1874             for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1875                  i++) {                                                 \
1876                 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1877             }                                                           \
1878         }                                                               \
1879         *r = result;                                                    \
1880     }
1881 VUPK(hsb, s16, s8, UPKHI)
1882 VUPK(hsh, s32, s16, UPKHI)
1883 VUPK(hsw, s64, s32, UPKHI)
1884 VUPK(lsb, s16, s8, UPKLO)
1885 VUPK(lsh, s32, s16, UPKLO)
1886 VUPK(lsw, s64, s32, UPKLO)
1887 #undef VUPK
1888 #undef UPKHI
1889 #undef UPKLO
1890
1891 #define VGENERIC_DO(name, element)                                      \
1892     void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
1893     {                                                                   \
1894         int i;                                                          \
1895                                                                         \
1896         VECTOR_FOR_INORDER_I(i, element) {                              \
1897             r->element[i] = name(b->element[i]);                        \
1898         }                                                               \
1899     }
1900
1901 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1902 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1903 #define clzw(v) clz32((v))
1904 #define clzd(v) clz64((v))
1905
1906 VGENERIC_DO(clzb, u8)
1907 VGENERIC_DO(clzh, u16)
1908 VGENERIC_DO(clzw, u32)
1909 VGENERIC_DO(clzd, u64)
1910
1911 #undef clzb
1912 #undef clzh
1913 #undef clzw
1914 #undef clzd
1915
1916 #define popcntb(v) ctpop8(v)
1917 #define popcnth(v) ctpop16(v)
1918 #define popcntw(v) ctpop32(v)
1919 #define popcntd(v) ctpop64(v)
1920
1921 VGENERIC_DO(popcntb, u8)
1922 VGENERIC_DO(popcnth, u16)
1923 VGENERIC_DO(popcntw, u32)
1924 VGENERIC_DO(popcntd, u64)
1925
1926 #undef popcntb
1927 #undef popcnth
1928 #undef popcntw
1929 #undef popcntd
1930
1931 #undef VGENERIC_DO
1932
1933 #if defined(HOST_WORDS_BIGENDIAN)
1934 #define QW_ONE { .u64 = { 0, 1 } }
1935 #else
1936 #define QW_ONE { .u64 = { 1, 0 } }
1937 #endif
1938
1939 #ifndef CONFIG_INT128
1940
1941 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1942 {
1943     t->u64[0] = ~a.u64[0];
1944     t->u64[1] = ~a.u64[1];
1945 }
1946
1947 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1948 {
1949     if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1950         return -1;
1951     } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1952         return 1;
1953     } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1954         return -1;
1955     } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1956         return 1;
1957     } else {
1958         return 0;
1959     }
1960 }
1961
1962 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1963 {
1964     t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1965     t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1966                      (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1967 }
1968
1969 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1970 {
1971     ppc_avr_t not_a;
1972     t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1973     t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1974                      (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1975     avr_qw_not(&not_a, a);
1976     return avr_qw_cmpu(not_a, b) < 0;
1977 }
1978
1979 #endif
1980
1981 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1982 {
1983 #ifdef CONFIG_INT128
1984     r->u128 = a->u128 + b->u128;
1985 #else
1986     avr_qw_add(r, *a, *b);
1987 #endif
1988 }
1989
1990 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1991 {
1992 #ifdef CONFIG_INT128
1993     r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1994 #else
1995
1996     if (c->u64[LO_IDX] & 1) {
1997         ppc_avr_t tmp;
1998
1999         tmp.u64[HI_IDX] = 0;
2000         tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2001         avr_qw_add(&tmp, *a, tmp);
2002         avr_qw_add(r, tmp, *b);
2003     } else {
2004         avr_qw_add(r, *a, *b);
2005     }
2006 #endif
2007 }
2008
2009 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2010 {
2011 #ifdef CONFIG_INT128
2012     r->u128 = (~a->u128 < b->u128);
2013 #else
2014     ppc_avr_t not_a;
2015
2016     avr_qw_not(&not_a, *a);
2017
2018     r->u64[HI_IDX] = 0;
2019     r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2020 #endif
2021 }
2022
2023 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2024 {
2025 #ifdef CONFIG_INT128
2026     int carry_out = (~a->u128 < b->u128);
2027     if (!carry_out && (c->u128 & 1)) {
2028         carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2029                     ((a->u128 != 0) || (b->u128 != 0));
2030     }
2031     r->u128 = carry_out;
2032 #else
2033
2034     int carry_in = c->u64[LO_IDX] & 1;
2035     int carry_out = 0;
2036     ppc_avr_t tmp;
2037
2038     carry_out = avr_qw_addc(&tmp, *a, *b);
2039
2040     if (!carry_out && carry_in) {
2041         ppc_avr_t one = QW_ONE;
2042         carry_out = avr_qw_addc(&tmp, tmp, one);
2043     }
2044     r->u64[HI_IDX] = 0;
2045     r->u64[LO_IDX] = carry_out;
2046 #endif
2047 }
2048
2049 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2050 {
2051 #ifdef CONFIG_INT128
2052     r->u128 = a->u128 - b->u128;
2053 #else
2054     ppc_avr_t tmp;
2055     ppc_avr_t one = QW_ONE;
2056
2057     avr_qw_not(&tmp, *b);
2058     avr_qw_add(&tmp, *a, tmp);
2059     avr_qw_add(r, tmp, one);
2060 #endif
2061 }
2062
2063 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2064 {
2065 #ifdef CONFIG_INT128
2066     r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2067 #else
2068     ppc_avr_t tmp, sum;
2069
2070     avr_qw_not(&tmp, *b);
2071     avr_qw_add(&sum, *a, tmp);
2072
2073     tmp.u64[HI_IDX] = 0;
2074     tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2075     avr_qw_add(r, sum, tmp);
2076 #endif
2077 }
2078
2079 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2080 {
2081 #ifdef CONFIG_INT128
2082     r->u128 = (~a->u128 < ~b->u128) ||
2083                  (a->u128 + ~b->u128 == (__uint128_t)-1);
2084 #else
2085     int carry = (avr_qw_cmpu(*a, *b) > 0);
2086     if (!carry) {
2087         ppc_avr_t tmp;
2088         avr_qw_not(&tmp, *b);
2089         avr_qw_add(&tmp, *a, tmp);
2090         carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2091     }
2092     r->u64[HI_IDX] = 0;
2093     r->u64[LO_IDX] = carry;
2094 #endif
2095 }
2096
2097 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2098 {
2099 #ifdef CONFIG_INT128
2100     r->u128 =
2101         (~a->u128 < ~b->u128) ||
2102         ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2103 #else
2104     int carry_in = c->u64[LO_IDX] & 1;
2105     int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2106     if (!carry_out && carry_in) {
2107         ppc_avr_t tmp;
2108         avr_qw_not(&tmp, *b);
2109         avr_qw_add(&tmp, *a, tmp);
2110         carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2111     }
2112
2113     r->u64[HI_IDX] = 0;
2114     r->u64[LO_IDX] = carry_out;
2115 #endif
2116 }
2117
2118 #define BCD_PLUS_PREF_1 0xC
2119 #define BCD_PLUS_PREF_2 0xF
2120 #define BCD_PLUS_ALT_1  0xA
2121 #define BCD_NEG_PREF    0xD
2122 #define BCD_NEG_ALT     0xB
2123 #define BCD_PLUS_ALT_2  0xE
2124
2125 #if defined(HOST_WORDS_BIGENDIAN)
2126 #define BCD_DIG_BYTE(n) (15 - (n/2))
2127 #else
2128 #define BCD_DIG_BYTE(n) (n/2)
2129 #endif
2130
2131 static int bcd_get_sgn(ppc_avr_t *bcd)
2132 {
2133     switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2134     case BCD_PLUS_PREF_1:
2135     case BCD_PLUS_PREF_2:
2136     case BCD_PLUS_ALT_1:
2137     case BCD_PLUS_ALT_2:
2138     {
2139         return 1;
2140     }
2141
2142     case BCD_NEG_PREF:
2143     case BCD_NEG_ALT:
2144     {
2145         return -1;
2146     }
2147
2148     default:
2149     {
2150         return 0;
2151     }
2152     }
2153 }
2154
2155 static int bcd_preferred_sgn(int sgn, int ps)
2156 {
2157     if (sgn >= 0) {
2158         return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2159     } else {
2160         return BCD_NEG_PREF;
2161     }
2162 }
2163
2164 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2165 {
2166     uint8_t result;
2167     if (n & 1) {
2168         result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2169     } else {
2170        result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2171     }
2172
2173     if (unlikely(result > 9)) {
2174         *invalid = true;
2175     }
2176     return result;
2177 }
2178
2179 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2180 {
2181     if (n & 1) {
2182         bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2183         bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2184     } else {
2185         bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2186         bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2187     }
2188 }
2189
2190 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2191 {
2192     int i;
2193     int invalid = 0;
2194     for (i = 31; i > 0; i--) {
2195         uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2196         uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2197         if (unlikely(invalid)) {
2198             return 0; /* doesn't matter */
2199         } else if (dig_a > dig_b) {
2200             return 1;
2201         } else if (dig_a < dig_b) {
2202             return -1;
2203         }
2204     }
2205
2206     return 0;
2207 }
2208
2209 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2210                        int *overflow)
2211 {
2212     int carry = 0;
2213     int i;
2214     int is_zero = 1;
2215     for (i = 1; i <= 31; i++) {
2216         uint8_t digit = bcd_get_digit(a, i, invalid) +
2217                         bcd_get_digit(b, i, invalid) + carry;
2218         is_zero &= (digit == 0);
2219         if (digit > 9) {
2220             carry = 1;
2221             digit -= 10;
2222         } else {
2223             carry = 0;
2224         }
2225
2226         bcd_put_digit(t, digit, i);
2227
2228         if (unlikely(*invalid)) {
2229             return -1;
2230         }
2231     }
2232
2233     *overflow = carry;
2234     return is_zero;
2235 }
2236
2237 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2238                        int *overflow)
2239 {
2240     int carry = 0;
2241     int i;
2242     int is_zero = 1;
2243     for (i = 1; i <= 31; i++) {
2244         uint8_t digit = bcd_get_digit(a, i, invalid) -
2245                         bcd_get_digit(b, i, invalid) + carry;
2246         is_zero &= (digit == 0);
2247         if (digit & 0x80) {
2248             carry = -1;
2249             digit += 10;
2250         } else {
2251             carry = 0;
2252         }
2253
2254         bcd_put_digit(t, digit, i);
2255
2256         if (unlikely(*invalid)) {
2257             return -1;
2258         }
2259     }
2260
2261     *overflow = carry;
2262     return is_zero;
2263 }
2264
2265 uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2266 {
2267
2268     int sgna = bcd_get_sgn(a);
2269     int sgnb = bcd_get_sgn(b);
2270     int invalid = (sgna == 0) || (sgnb == 0);
2271     int overflow = 0;
2272     int zero = 0;
2273     uint32_t cr = 0;
2274     ppc_avr_t result = { .u64 = { 0, 0 } };
2275
2276     if (!invalid) {
2277         if (sgna == sgnb) {
2278             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2279             zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2280             cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2281         } else if (bcd_cmp_mag(a, b) > 0) {
2282             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2283             zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2284             cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2285         } else {
2286             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2287             zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2288             cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2289         }
2290     }
2291
2292     if (unlikely(invalid)) {
2293         result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2294         cr = 1 << CRF_SO;
2295     } else if (overflow) {
2296         cr |= 1 << CRF_SO;
2297     } else if (zero) {
2298         cr = 1 << CRF_EQ;
2299     }
2300
2301     *r = result;
2302
2303     return cr;
2304 }
2305
2306 uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2307 {
2308     ppc_avr_t bcopy = *b;
2309     int sgnb = bcd_get_sgn(b);
2310     if (sgnb < 0) {
2311         bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2312     } else if (sgnb > 0) {
2313         bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2314     }
2315     /* else invalid ... defer to bcdadd code for proper handling */
2316
2317     return helper_bcdadd(r, a, &bcopy, ps);
2318 }
2319
2320 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2321 {
2322     int i;
2323     VECTOR_FOR_INORDER_I(i, u8) {
2324         r->u8[i] = AES_sbox[a->u8[i]];
2325     }
2326 }
2327
2328 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2329 {
2330     int i;
2331
2332     VECTOR_FOR_INORDER_I(i, u32) {
2333         r->AVRW(i) = b->AVRW(i) ^
2334             (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2335              AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2336              AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2337              AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2338     }
2339 }
2340
2341 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2342 {
2343     int i;
2344
2345     VECTOR_FOR_INORDER_I(i, u8) {
2346         r->AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2347     }
2348 }
2349
2350 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2351 {
2352     /* This differs from what is written in ISA V2.07.  The RTL is */
2353     /* incorrect and will be fixed in V2.07B.                      */
2354     int i;
2355     ppc_avr_t tmp;
2356
2357     VECTOR_FOR_INORDER_I(i, u8) {
2358         tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2359     }
2360
2361     VECTOR_FOR_INORDER_I(i, u32) {
2362         r->AVRW(i) =
2363             AES_imc[tmp.AVRB(4*i + 0)][0] ^
2364             AES_imc[tmp.AVRB(4*i + 1)][1] ^
2365             AES_imc[tmp.AVRB(4*i + 2)][2] ^
2366             AES_imc[tmp.AVRB(4*i + 3)][3];
2367     }
2368 }
2369
2370 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2371 {
2372     int i;
2373
2374     VECTOR_FOR_INORDER_I(i, u8) {
2375         r->AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2376     }
2377 }
2378
2379 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2380 #if defined(HOST_WORDS_BIGENDIAN)
2381 #define EL_IDX(i) (i)
2382 #else
2383 #define EL_IDX(i) (3 - (i))
2384 #endif
2385
2386 void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2387 {
2388     int st = (st_six & 0x10) != 0;
2389     int six = st_six & 0xF;
2390     int i;
2391
2392     VECTOR_FOR_INORDER_I(i, u32) {
2393         if (st == 0) {
2394             if ((six & (0x8 >> i)) == 0) {
2395                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2396                                     ROTRu32(a->u32[EL_IDX(i)], 18) ^
2397                                     (a->u32[EL_IDX(i)] >> 3);
2398             } else { /* six.bit[i] == 1 */
2399                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2400                                     ROTRu32(a->u32[EL_IDX(i)], 19) ^
2401                                     (a->u32[EL_IDX(i)] >> 10);
2402             }
2403         } else { /* st == 1 */
2404             if ((six & (0x8 >> i)) == 0) {
2405                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2406                                     ROTRu32(a->u32[EL_IDX(i)], 13) ^
2407                                     ROTRu32(a->u32[EL_IDX(i)], 22);
2408             } else { /* six.bit[i] == 1 */
2409                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2410                                     ROTRu32(a->u32[EL_IDX(i)], 11) ^
2411                                     ROTRu32(a->u32[EL_IDX(i)], 25);
2412             }
2413         }
2414     }
2415 }
2416
2417 #undef ROTRu32
2418 #undef EL_IDX
2419
2420 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2421 #if defined(HOST_WORDS_BIGENDIAN)
2422 #define EL_IDX(i) (i)
2423 #else
2424 #define EL_IDX(i) (1 - (i))
2425 #endif
2426
2427 void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2428 {
2429     int st = (st_six & 0x10) != 0;
2430     int six = st_six & 0xF;
2431     int i;
2432
2433     VECTOR_FOR_INORDER_I(i, u64) {
2434         if (st == 0) {
2435             if ((six & (0x8 >> (2*i))) == 0) {
2436                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2437                                     ROTRu64(a->u64[EL_IDX(i)], 8) ^
2438                                     (a->u64[EL_IDX(i)] >> 7);
2439             } else { /* six.bit[2*i] == 1 */
2440                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2441                                     ROTRu64(a->u64[EL_IDX(i)], 61) ^
2442                                     (a->u64[EL_IDX(i)] >> 6);
2443             }
2444         } else { /* st == 1 */
2445             if ((six & (0x8 >> (2*i))) == 0) {
2446                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2447                                     ROTRu64(a->u64[EL_IDX(i)], 34) ^
2448                                     ROTRu64(a->u64[EL_IDX(i)], 39);
2449             } else { /* six.bit[2*i] == 1 */
2450                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2451                                     ROTRu64(a->u64[EL_IDX(i)], 18) ^
2452                                     ROTRu64(a->u64[EL_IDX(i)], 41);
2453             }
2454         }
2455     }
2456 }
2457
2458 #undef ROTRu64
2459 #undef EL_IDX
2460
2461 void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2462 {
2463     int i;
2464     VECTOR_FOR_INORDER_I(i, u8) {
2465         int indexA = c->u8[i] >> 4;
2466         int indexB = c->u8[i] & 0xF;
2467 #if defined(HOST_WORDS_BIGENDIAN)
2468         r->u8[i] = a->u8[indexA] ^ b->u8[indexB];
2469 #else
2470         r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2471 #endif
2472     }
2473 }
2474
2475 #undef VECTOR_FOR_INORDER_I
2476 #undef HI_IDX
2477 #undef LO_IDX
2478
2479 /*****************************************************************************/
2480 /* SPE extension helpers */
2481 /* Use a table to make this quicker */
2482 static const uint8_t hbrev[16] = {
2483     0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2484     0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2485 };
2486
2487 static inline uint8_t byte_reverse(uint8_t val)
2488 {
2489     return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2490 }
2491
2492 static inline uint32_t word_reverse(uint32_t val)
2493 {
2494     return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2495         (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2496 }
2497
2498 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2499 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2500 {
2501     uint32_t a, b, d, mask;
2502
2503     mask = UINT32_MAX >> (32 - MASKBITS);
2504     a = arg1 & mask;
2505     b = arg2 & mask;
2506     d = word_reverse(1 + word_reverse(a | ~b));
2507     return (arg1 & ~mask) | (d & b);
2508 }
2509
2510 uint32_t helper_cntlsw32(uint32_t val)
2511 {
2512     if (val & 0x80000000) {
2513         return clz32(~val);
2514     } else {
2515         return clz32(val);
2516     }
2517 }
2518
2519 uint32_t helper_cntlzw32(uint32_t val)
2520 {
2521     return clz32(val);
2522 }
2523
2524 /* 440 specific */
2525 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2526                           target_ulong low, uint32_t update_Rc)
2527 {
2528     target_ulong mask;
2529     int i;
2530
2531     i = 1;
2532     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2533         if ((high & mask) == 0) {
2534             if (update_Rc) {
2535                 env->crf[0] = 0x4;
2536             }
2537             goto done;
2538         }
2539         i++;
2540     }
2541     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2542         if ((low & mask) == 0) {
2543             if (update_Rc) {
2544                 env->crf[0] = 0x8;
2545             }
2546             goto done;
2547         }
2548         i++;
2549     }
2550     i = 8;
2551     if (update_Rc) {
2552         env->crf[0] = 0x2;
2553     }
2554  done:
2555     env->xer = (env->xer & ~0x7F) | i;
2556     if (update_Rc) {
2557         env->crf[0] |= xer_so;
2558     }
2559     return i;
2560 }