Add qemu 2.4.0
[kvmfornfv.git] / qemu / target-arm / translate.c
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include <stdarg.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <inttypes.h>
26
27 #include "cpu.h"
28 #include "internals.h"
29 #include "disas/disas.h"
30 #include "tcg-op.h"
31 #include "qemu/log.h"
32 #include "qemu/bitops.h"
33 #include "arm_ldst.h"
34
35 #include "exec/helper-proto.h"
36 #include "exec/helper-gen.h"
37
38 #include "trace-tcg.h"
39
40
41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J    0
46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
51
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
53
54 #include "translate.h"
55 static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
56
57 #if defined(CONFIG_USER_ONLY)
58 #define IS_USER(s) 1
59 #else
60 #define IS_USER(s) (s->user)
61 #endif
62
63 TCGv_ptr cpu_env;
64 /* We reuse the same 64-bit temporaries for efficiency.  */
65 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
66 static TCGv_i32 cpu_R[16];
67 static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
68 static TCGv_i64 cpu_exclusive_addr;
69 static TCGv_i64 cpu_exclusive_val;
70 #ifdef CONFIG_USER_ONLY
71 static TCGv_i64 cpu_exclusive_test;
72 static TCGv_i32 cpu_exclusive_info;
73 #endif
74
75 /* FIXME:  These should be removed.  */
76 static TCGv_i32 cpu_F0s, cpu_F1s;
77 static TCGv_i64 cpu_F0d, cpu_F1d;
78
79 #include "exec/gen-icount.h"
80
81 static const char *regnames[] =
82     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
83       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
84
85 /* initialize TCG globals.  */
86 void arm_translate_init(void)
87 {
88     int i;
89
90     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
91
92     for (i = 0; i < 16; i++) {
93         cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
94                                           offsetof(CPUARMState, regs[i]),
95                                           regnames[i]);
96     }
97     cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
98     cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
99     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
100     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
101
102     cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
103         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
104     cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
105         offsetof(CPUARMState, exclusive_val), "exclusive_val");
106 #ifdef CONFIG_USER_ONLY
107     cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
108         offsetof(CPUARMState, exclusive_test), "exclusive_test");
109     cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
110         offsetof(CPUARMState, exclusive_info), "exclusive_info");
111 #endif
112
113     a64_translate_init();
114 }
115
116 static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s)
117 {
118     /* Return the mmu_idx to use for A32/T32 "unprivileged load/store"
119      * insns:
120      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
121      *  otherwise, access as if at PL0.
122      */
123     switch (s->mmu_idx) {
124     case ARMMMUIdx_S1E2:        /* this one is UNPREDICTABLE */
125     case ARMMMUIdx_S12NSE0:
126     case ARMMMUIdx_S12NSE1:
127         return ARMMMUIdx_S12NSE0;
128     case ARMMMUIdx_S1E3:
129     case ARMMMUIdx_S1SE0:
130     case ARMMMUIdx_S1SE1:
131         return ARMMMUIdx_S1SE0;
132     case ARMMMUIdx_S2NS:
133     default:
134         g_assert_not_reached();
135     }
136 }
137
138 static inline TCGv_i32 load_cpu_offset(int offset)
139 {
140     TCGv_i32 tmp = tcg_temp_new_i32();
141     tcg_gen_ld_i32(tmp, cpu_env, offset);
142     return tmp;
143 }
144
145 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
146
147 static inline void store_cpu_offset(TCGv_i32 var, int offset)
148 {
149     tcg_gen_st_i32(var, cpu_env, offset);
150     tcg_temp_free_i32(var);
151 }
152
153 #define store_cpu_field(var, name) \
154     store_cpu_offset(var, offsetof(CPUARMState, name))
155
156 /* Set a variable to the value of a CPU register.  */
157 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
158 {
159     if (reg == 15) {
160         uint32_t addr;
161         /* normally, since we updated PC, we need only to add one insn */
162         if (s->thumb)
163             addr = (long)s->pc + 2;
164         else
165             addr = (long)s->pc + 4;
166         tcg_gen_movi_i32(var, addr);
167     } else {
168         tcg_gen_mov_i32(var, cpu_R[reg]);
169     }
170 }
171
172 /* Create a new temporary and set it to the value of a CPU register.  */
173 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
174 {
175     TCGv_i32 tmp = tcg_temp_new_i32();
176     load_reg_var(s, tmp, reg);
177     return tmp;
178 }
179
180 /* Set a CPU register.  The source must be a temporary and will be
181    marked as dead.  */
182 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
183 {
184     if (reg == 15) {
185         tcg_gen_andi_i32(var, var, ~1);
186         s->is_jmp = DISAS_JUMP;
187     }
188     tcg_gen_mov_i32(cpu_R[reg], var);
189     tcg_temp_free_i32(var);
190 }
191
192 /* Value extensions.  */
193 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
194 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
195 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
196 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
197
198 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
199 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
200
201
202 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
203 {
204     TCGv_i32 tmp_mask = tcg_const_i32(mask);
205     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
206     tcg_temp_free_i32(tmp_mask);
207 }
208 /* Set NZCV flags from the high 4 bits of var.  */
209 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
210
211 static void gen_exception_internal(int excp)
212 {
213     TCGv_i32 tcg_excp = tcg_const_i32(excp);
214
215     assert(excp_is_internal(excp));
216     gen_helper_exception_internal(cpu_env, tcg_excp);
217     tcg_temp_free_i32(tcg_excp);
218 }
219
220 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
221 {
222     TCGv_i32 tcg_excp = tcg_const_i32(excp);
223     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
224     TCGv_i32 tcg_el = tcg_const_i32(target_el);
225
226     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
227                                        tcg_syn, tcg_el);
228
229     tcg_temp_free_i32(tcg_el);
230     tcg_temp_free_i32(tcg_syn);
231     tcg_temp_free_i32(tcg_excp);
232 }
233
234 static void gen_ss_advance(DisasContext *s)
235 {
236     /* If the singlestep state is Active-not-pending, advance to
237      * Active-pending.
238      */
239     if (s->ss_active) {
240         s->pstate_ss = 0;
241         gen_helper_clear_pstate_ss(cpu_env);
242     }
243 }
244
245 static void gen_step_complete_exception(DisasContext *s)
246 {
247     /* We just completed step of an insn. Move from Active-not-pending
248      * to Active-pending, and then also take the swstep exception.
249      * This corresponds to making the (IMPDEF) choice to prioritize
250      * swstep exceptions over asynchronous exceptions taken to an exception
251      * level where debug is disabled. This choice has the advantage that
252      * we do not need to maintain internal state corresponding to the
253      * ISV/EX syndrome bits between completion of the step and generation
254      * of the exception, and our syndrome information is always correct.
255      */
256     gen_ss_advance(s);
257     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
258                   default_exception_el(s));
259     s->is_jmp = DISAS_EXC;
260 }
261
262 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
263 {
264     TCGv_i32 tmp1 = tcg_temp_new_i32();
265     TCGv_i32 tmp2 = tcg_temp_new_i32();
266     tcg_gen_ext16s_i32(tmp1, a);
267     tcg_gen_ext16s_i32(tmp2, b);
268     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
269     tcg_temp_free_i32(tmp2);
270     tcg_gen_sari_i32(a, a, 16);
271     tcg_gen_sari_i32(b, b, 16);
272     tcg_gen_mul_i32(b, b, a);
273     tcg_gen_mov_i32(a, tmp1);
274     tcg_temp_free_i32(tmp1);
275 }
276
277 /* Byteswap each halfword.  */
278 static void gen_rev16(TCGv_i32 var)
279 {
280     TCGv_i32 tmp = tcg_temp_new_i32();
281     tcg_gen_shri_i32(tmp, var, 8);
282     tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
283     tcg_gen_shli_i32(var, var, 8);
284     tcg_gen_andi_i32(var, var, 0xff00ff00);
285     tcg_gen_or_i32(var, var, tmp);
286     tcg_temp_free_i32(tmp);
287 }
288
289 /* Byteswap low halfword and sign extend.  */
290 static void gen_revsh(TCGv_i32 var)
291 {
292     tcg_gen_ext16u_i32(var, var);
293     tcg_gen_bswap16_i32(var, var);
294     tcg_gen_ext16s_i32(var, var);
295 }
296
297 /* Unsigned bitfield extract.  */
298 static void gen_ubfx(TCGv_i32 var, int shift, uint32_t mask)
299 {
300     if (shift)
301         tcg_gen_shri_i32(var, var, shift);
302     tcg_gen_andi_i32(var, var, mask);
303 }
304
305 /* Signed bitfield extract.  */
306 static void gen_sbfx(TCGv_i32 var, int shift, int width)
307 {
308     uint32_t signbit;
309
310     if (shift)
311         tcg_gen_sari_i32(var, var, shift);
312     if (shift + width < 32) {
313         signbit = 1u << (width - 1);
314         tcg_gen_andi_i32(var, var, (1u << width) - 1);
315         tcg_gen_xori_i32(var, var, signbit);
316         tcg_gen_subi_i32(var, var, signbit);
317     }
318 }
319
320 /* Return (b << 32) + a. Mark inputs as dead */
321 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
322 {
323     TCGv_i64 tmp64 = tcg_temp_new_i64();
324
325     tcg_gen_extu_i32_i64(tmp64, b);
326     tcg_temp_free_i32(b);
327     tcg_gen_shli_i64(tmp64, tmp64, 32);
328     tcg_gen_add_i64(a, tmp64, a);
329
330     tcg_temp_free_i64(tmp64);
331     return a;
332 }
333
334 /* Return (b << 32) - a. Mark inputs as dead. */
335 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
336 {
337     TCGv_i64 tmp64 = tcg_temp_new_i64();
338
339     tcg_gen_extu_i32_i64(tmp64, b);
340     tcg_temp_free_i32(b);
341     tcg_gen_shli_i64(tmp64, tmp64, 32);
342     tcg_gen_sub_i64(a, tmp64, a);
343
344     tcg_temp_free_i64(tmp64);
345     return a;
346 }
347
348 /* 32x32->64 multiply.  Marks inputs as dead.  */
349 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
350 {
351     TCGv_i32 lo = tcg_temp_new_i32();
352     TCGv_i32 hi = tcg_temp_new_i32();
353     TCGv_i64 ret;
354
355     tcg_gen_mulu2_i32(lo, hi, a, b);
356     tcg_temp_free_i32(a);
357     tcg_temp_free_i32(b);
358
359     ret = tcg_temp_new_i64();
360     tcg_gen_concat_i32_i64(ret, lo, hi);
361     tcg_temp_free_i32(lo);
362     tcg_temp_free_i32(hi);
363
364     return ret;
365 }
366
367 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
368 {
369     TCGv_i32 lo = tcg_temp_new_i32();
370     TCGv_i32 hi = tcg_temp_new_i32();
371     TCGv_i64 ret;
372
373     tcg_gen_muls2_i32(lo, hi, a, b);
374     tcg_temp_free_i32(a);
375     tcg_temp_free_i32(b);
376
377     ret = tcg_temp_new_i64();
378     tcg_gen_concat_i32_i64(ret, lo, hi);
379     tcg_temp_free_i32(lo);
380     tcg_temp_free_i32(hi);
381
382     return ret;
383 }
384
385 /* Swap low and high halfwords.  */
386 static void gen_swap_half(TCGv_i32 var)
387 {
388     TCGv_i32 tmp = tcg_temp_new_i32();
389     tcg_gen_shri_i32(tmp, var, 16);
390     tcg_gen_shli_i32(var, var, 16);
391     tcg_gen_or_i32(var, var, tmp);
392     tcg_temp_free_i32(tmp);
393 }
394
395 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
396     tmp = (t0 ^ t1) & 0x8000;
397     t0 &= ~0x8000;
398     t1 &= ~0x8000;
399     t0 = (t0 + t1) ^ tmp;
400  */
401
402 static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
403 {
404     TCGv_i32 tmp = tcg_temp_new_i32();
405     tcg_gen_xor_i32(tmp, t0, t1);
406     tcg_gen_andi_i32(tmp, tmp, 0x8000);
407     tcg_gen_andi_i32(t0, t0, ~0x8000);
408     tcg_gen_andi_i32(t1, t1, ~0x8000);
409     tcg_gen_add_i32(t0, t0, t1);
410     tcg_gen_xor_i32(t0, t0, tmp);
411     tcg_temp_free_i32(tmp);
412     tcg_temp_free_i32(t1);
413 }
414
415 /* Set CF to the top bit of var.  */
416 static void gen_set_CF_bit31(TCGv_i32 var)
417 {
418     tcg_gen_shri_i32(cpu_CF, var, 31);
419 }
420
421 /* Set N and Z flags from var.  */
422 static inline void gen_logic_CC(TCGv_i32 var)
423 {
424     tcg_gen_mov_i32(cpu_NF, var);
425     tcg_gen_mov_i32(cpu_ZF, var);
426 }
427
428 /* T0 += T1 + CF.  */
429 static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
430 {
431     tcg_gen_add_i32(t0, t0, t1);
432     tcg_gen_add_i32(t0, t0, cpu_CF);
433 }
434
435 /* dest = T0 + T1 + CF. */
436 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
437 {
438     tcg_gen_add_i32(dest, t0, t1);
439     tcg_gen_add_i32(dest, dest, cpu_CF);
440 }
441
442 /* dest = T0 - T1 + CF - 1.  */
443 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
444 {
445     tcg_gen_sub_i32(dest, t0, t1);
446     tcg_gen_add_i32(dest, dest, cpu_CF);
447     tcg_gen_subi_i32(dest, dest, 1);
448 }
449
450 /* dest = T0 + T1. Compute C, N, V and Z flags */
451 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
452 {
453     TCGv_i32 tmp = tcg_temp_new_i32();
454     tcg_gen_movi_i32(tmp, 0);
455     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
456     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
457     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
458     tcg_gen_xor_i32(tmp, t0, t1);
459     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
460     tcg_temp_free_i32(tmp);
461     tcg_gen_mov_i32(dest, cpu_NF);
462 }
463
464 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
465 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
466 {
467     TCGv_i32 tmp = tcg_temp_new_i32();
468     if (TCG_TARGET_HAS_add2_i32) {
469         tcg_gen_movi_i32(tmp, 0);
470         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
471         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
472     } else {
473         TCGv_i64 q0 = tcg_temp_new_i64();
474         TCGv_i64 q1 = tcg_temp_new_i64();
475         tcg_gen_extu_i32_i64(q0, t0);
476         tcg_gen_extu_i32_i64(q1, t1);
477         tcg_gen_add_i64(q0, q0, q1);
478         tcg_gen_extu_i32_i64(q1, cpu_CF);
479         tcg_gen_add_i64(q0, q0, q1);
480         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
481         tcg_temp_free_i64(q0);
482         tcg_temp_free_i64(q1);
483     }
484     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
485     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
486     tcg_gen_xor_i32(tmp, t0, t1);
487     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
488     tcg_temp_free_i32(tmp);
489     tcg_gen_mov_i32(dest, cpu_NF);
490 }
491
492 /* dest = T0 - T1. Compute C, N, V and Z flags */
493 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
494 {
495     TCGv_i32 tmp;
496     tcg_gen_sub_i32(cpu_NF, t0, t1);
497     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
498     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500     tmp = tcg_temp_new_i32();
501     tcg_gen_xor_i32(tmp, t0, t1);
502     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
503     tcg_temp_free_i32(tmp);
504     tcg_gen_mov_i32(dest, cpu_NF);
505 }
506
507 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
508 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
509 {
510     TCGv_i32 tmp = tcg_temp_new_i32();
511     tcg_gen_not_i32(tmp, t1);
512     gen_adc_CC(dest, t0, tmp);
513     tcg_temp_free_i32(tmp);
514 }
515
516 #define GEN_SHIFT(name)                                               \
517 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
518 {                                                                     \
519     TCGv_i32 tmp1, tmp2, tmp3;                                        \
520     tmp1 = tcg_temp_new_i32();                                        \
521     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
522     tmp2 = tcg_const_i32(0);                                          \
523     tmp3 = tcg_const_i32(0x1f);                                       \
524     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
525     tcg_temp_free_i32(tmp3);                                          \
526     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
527     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
528     tcg_temp_free_i32(tmp2);                                          \
529     tcg_temp_free_i32(tmp1);                                          \
530 }
531 GEN_SHIFT(shl)
532 GEN_SHIFT(shr)
533 #undef GEN_SHIFT
534
535 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
536 {
537     TCGv_i32 tmp1, tmp2;
538     tmp1 = tcg_temp_new_i32();
539     tcg_gen_andi_i32(tmp1, t1, 0xff);
540     tmp2 = tcg_const_i32(0x1f);
541     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
542     tcg_temp_free_i32(tmp2);
543     tcg_gen_sar_i32(dest, t0, tmp1);
544     tcg_temp_free_i32(tmp1);
545 }
546
547 static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
548 {
549     TCGv_i32 c0 = tcg_const_i32(0);
550     TCGv_i32 tmp = tcg_temp_new_i32();
551     tcg_gen_neg_i32(tmp, src);
552     tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
553     tcg_temp_free_i32(c0);
554     tcg_temp_free_i32(tmp);
555 }
556
557 static void shifter_out_im(TCGv_i32 var, int shift)
558 {
559     if (shift == 0) {
560         tcg_gen_andi_i32(cpu_CF, var, 1);
561     } else {
562         tcg_gen_shri_i32(cpu_CF, var, shift);
563         if (shift != 31) {
564             tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
565         }
566     }
567 }
568
569 /* Shift by immediate.  Includes special handling for shift == 0.  */
570 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
571                                     int shift, int flags)
572 {
573     switch (shiftop) {
574     case 0: /* LSL */
575         if (shift != 0) {
576             if (flags)
577                 shifter_out_im(var, 32 - shift);
578             tcg_gen_shli_i32(var, var, shift);
579         }
580         break;
581     case 1: /* LSR */
582         if (shift == 0) {
583             if (flags) {
584                 tcg_gen_shri_i32(cpu_CF, var, 31);
585             }
586             tcg_gen_movi_i32(var, 0);
587         } else {
588             if (flags)
589                 shifter_out_im(var, shift - 1);
590             tcg_gen_shri_i32(var, var, shift);
591         }
592         break;
593     case 2: /* ASR */
594         if (shift == 0)
595             shift = 32;
596         if (flags)
597             shifter_out_im(var, shift - 1);
598         if (shift == 32)
599           shift = 31;
600         tcg_gen_sari_i32(var, var, shift);
601         break;
602     case 3: /* ROR/RRX */
603         if (shift != 0) {
604             if (flags)
605                 shifter_out_im(var, shift - 1);
606             tcg_gen_rotri_i32(var, var, shift); break;
607         } else {
608             TCGv_i32 tmp = tcg_temp_new_i32();
609             tcg_gen_shli_i32(tmp, cpu_CF, 31);
610             if (flags)
611                 shifter_out_im(var, 0);
612             tcg_gen_shri_i32(var, var, 1);
613             tcg_gen_or_i32(var, var, tmp);
614             tcg_temp_free_i32(tmp);
615         }
616     }
617 };
618
619 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
620                                      TCGv_i32 shift, int flags)
621 {
622     if (flags) {
623         switch (shiftop) {
624         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
625         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
626         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
627         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
628         }
629     } else {
630         switch (shiftop) {
631         case 0:
632             gen_shl(var, var, shift);
633             break;
634         case 1:
635             gen_shr(var, var, shift);
636             break;
637         case 2:
638             gen_sar(var, var, shift);
639             break;
640         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
641                 tcg_gen_rotr_i32(var, var, shift); break;
642         }
643     }
644     tcg_temp_free_i32(shift);
645 }
646
647 #define PAS_OP(pfx) \
648     switch (op2) {  \
649     case 0: gen_pas_helper(glue(pfx,add16)); break; \
650     case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
651     case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
652     case 3: gen_pas_helper(glue(pfx,sub16)); break; \
653     case 4: gen_pas_helper(glue(pfx,add8)); break; \
654     case 7: gen_pas_helper(glue(pfx,sub8)); break; \
655     }
656 static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
657 {
658     TCGv_ptr tmp;
659
660     switch (op1) {
661 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
662     case 1:
663         tmp = tcg_temp_new_ptr();
664         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
665         PAS_OP(s)
666         tcg_temp_free_ptr(tmp);
667         break;
668     case 5:
669         tmp = tcg_temp_new_ptr();
670         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
671         PAS_OP(u)
672         tcg_temp_free_ptr(tmp);
673         break;
674 #undef gen_pas_helper
675 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
676     case 2:
677         PAS_OP(q);
678         break;
679     case 3:
680         PAS_OP(sh);
681         break;
682     case 6:
683         PAS_OP(uq);
684         break;
685     case 7:
686         PAS_OP(uh);
687         break;
688 #undef gen_pas_helper
689     }
690 }
691 #undef PAS_OP
692
693 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
694 #define PAS_OP(pfx) \
695     switch (op1) {  \
696     case 0: gen_pas_helper(glue(pfx,add8)); break; \
697     case 1: gen_pas_helper(glue(pfx,add16)); break; \
698     case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
699     case 4: gen_pas_helper(glue(pfx,sub8)); break; \
700     case 5: gen_pas_helper(glue(pfx,sub16)); break; \
701     case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
702     }
703 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
704 {
705     TCGv_ptr tmp;
706
707     switch (op2) {
708 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
709     case 0:
710         tmp = tcg_temp_new_ptr();
711         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
712         PAS_OP(s)
713         tcg_temp_free_ptr(tmp);
714         break;
715     case 4:
716         tmp = tcg_temp_new_ptr();
717         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
718         PAS_OP(u)
719         tcg_temp_free_ptr(tmp);
720         break;
721 #undef gen_pas_helper
722 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
723     case 1:
724         PAS_OP(q);
725         break;
726     case 2:
727         PAS_OP(sh);
728         break;
729     case 5:
730         PAS_OP(uq);
731         break;
732     case 6:
733         PAS_OP(uh);
734         break;
735 #undef gen_pas_helper
736     }
737 }
738 #undef PAS_OP
739
740 /*
741  * generate a conditional branch based on ARM condition code cc.
742  * This is common between ARM and Aarch64 targets.
743  */
744 void arm_gen_test_cc(int cc, TCGLabel *label)
745 {
746     TCGv_i32 tmp;
747     TCGLabel *inv;
748
749     switch (cc) {
750     case 0: /* eq: Z */
751         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
752         break;
753     case 1: /* ne: !Z */
754         tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
755         break;
756     case 2: /* cs: C */
757         tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
758         break;
759     case 3: /* cc: !C */
760         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
761         break;
762     case 4: /* mi: N */
763         tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
764         break;
765     case 5: /* pl: !N */
766         tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
767         break;
768     case 6: /* vs: V */
769         tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
770         break;
771     case 7: /* vc: !V */
772         tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
773         break;
774     case 8: /* hi: C && !Z */
775         inv = gen_new_label();
776         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
777         tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
778         gen_set_label(inv);
779         break;
780     case 9: /* ls: !C || Z */
781         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
782         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
783         break;
784     case 10: /* ge: N == V -> N ^ V == 0 */
785         tmp = tcg_temp_new_i32();
786         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
787         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
788         tcg_temp_free_i32(tmp);
789         break;
790     case 11: /* lt: N != V -> N ^ V != 0 */
791         tmp = tcg_temp_new_i32();
792         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
793         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
794         tcg_temp_free_i32(tmp);
795         break;
796     case 12: /* gt: !Z && N == V */
797         inv = gen_new_label();
798         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
799         tmp = tcg_temp_new_i32();
800         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
801         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
802         tcg_temp_free_i32(tmp);
803         gen_set_label(inv);
804         break;
805     case 13: /* le: Z || N != V */
806         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
807         tmp = tcg_temp_new_i32();
808         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
809         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
810         tcg_temp_free_i32(tmp);
811         break;
812     default:
813         fprintf(stderr, "Bad condition code 0x%x\n", cc);
814         abort();
815     }
816 }
817
818 static const uint8_t table_logic_cc[16] = {
819     1, /* and */
820     1, /* xor */
821     0, /* sub */
822     0, /* rsb */
823     0, /* add */
824     0, /* adc */
825     0, /* sbc */
826     0, /* rsc */
827     1, /* andl */
828     1, /* xorl */
829     0, /* cmp */
830     0, /* cmn */
831     1, /* orr */
832     1, /* mov */
833     1, /* bic */
834     1, /* mvn */
835 };
836
837 /* Set PC and Thumb state from an immediate address.  */
838 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
839 {
840     TCGv_i32 tmp;
841
842     s->is_jmp = DISAS_UPDATE;
843     if (s->thumb != (addr & 1)) {
844         tmp = tcg_temp_new_i32();
845         tcg_gen_movi_i32(tmp, addr & 1);
846         tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
847         tcg_temp_free_i32(tmp);
848     }
849     tcg_gen_movi_i32(cpu_R[15], addr & ~1);
850 }
851
852 /* Set PC and Thumb state from var.  var is marked as dead.  */
853 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
854 {
855     s->is_jmp = DISAS_UPDATE;
856     tcg_gen_andi_i32(cpu_R[15], var, ~1);
857     tcg_gen_andi_i32(var, var, 1);
858     store_cpu_field(var, thumb);
859 }
860
861 /* Variant of store_reg which uses branch&exchange logic when storing
862    to r15 in ARM architecture v7 and above. The source must be a temporary
863    and will be marked as dead. */
864 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
865 {
866     if (reg == 15 && ENABLE_ARCH_7) {
867         gen_bx(s, var);
868     } else {
869         store_reg(s, reg, var);
870     }
871 }
872
873 /* Variant of store_reg which uses branch&exchange logic when storing
874  * to r15 in ARM architecture v5T and above. This is used for storing
875  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
876  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
877 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
878 {
879     if (reg == 15 && ENABLE_ARCH_5) {
880         gen_bx(s, var);
881     } else {
882         store_reg(s, reg, var);
883     }
884 }
885
886 /* Abstractions of "generate code to do a guest load/store for
887  * AArch32", where a vaddr is always 32 bits (and is zero
888  * extended if we're a 64 bit core) and  data is also
889  * 32 bits unless specifically doing a 64 bit access.
890  * These functions work like tcg_gen_qemu_{ld,st}* except
891  * that the address argument is TCGv_i32 rather than TCGv.
892  */
893 #if TARGET_LONG_BITS == 32
894
895 #define DO_GEN_LD(SUFF, OPC)                                             \
896 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
897 {                                                                        \
898     tcg_gen_qemu_ld_i32(val, addr, index, OPC);                          \
899 }
900
901 #define DO_GEN_ST(SUFF, OPC)                                             \
902 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
903 {                                                                        \
904     tcg_gen_qemu_st_i32(val, addr, index, OPC);                          \
905 }
906
907 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
908 {
909     tcg_gen_qemu_ld_i64(val, addr, index, MO_TEQ);
910 }
911
912 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
913 {
914     tcg_gen_qemu_st_i64(val, addr, index, MO_TEQ);
915 }
916
917 #else
918
919 #define DO_GEN_LD(SUFF, OPC)                                             \
920 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
921 {                                                                        \
922     TCGv addr64 = tcg_temp_new();                                        \
923     tcg_gen_extu_i32_i64(addr64, addr);                                  \
924     tcg_gen_qemu_ld_i32(val, addr64, index, OPC);                        \
925     tcg_temp_free(addr64);                                               \
926 }
927
928 #define DO_GEN_ST(SUFF, OPC)                                             \
929 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
930 {                                                                        \
931     TCGv addr64 = tcg_temp_new();                                        \
932     tcg_gen_extu_i32_i64(addr64, addr);                                  \
933     tcg_gen_qemu_st_i32(val, addr64, index, OPC);                        \
934     tcg_temp_free(addr64);                                               \
935 }
936
937 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
938 {
939     TCGv addr64 = tcg_temp_new();
940     tcg_gen_extu_i32_i64(addr64, addr);
941     tcg_gen_qemu_ld_i64(val, addr64, index, MO_TEQ);
942     tcg_temp_free(addr64);
943 }
944
945 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
946 {
947     TCGv addr64 = tcg_temp_new();
948     tcg_gen_extu_i32_i64(addr64, addr);
949     tcg_gen_qemu_st_i64(val, addr64, index, MO_TEQ);
950     tcg_temp_free(addr64);
951 }
952
953 #endif
954
955 DO_GEN_LD(8s, MO_SB)
956 DO_GEN_LD(8u, MO_UB)
957 DO_GEN_LD(16s, MO_TESW)
958 DO_GEN_LD(16u, MO_TEUW)
959 DO_GEN_LD(32u, MO_TEUL)
960 DO_GEN_ST(8, MO_UB)
961 DO_GEN_ST(16, MO_TEUW)
962 DO_GEN_ST(32, MO_TEUL)
963
964 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
965 {
966     tcg_gen_movi_i32(cpu_R[15], val);
967 }
968
969 static inline void gen_hvc(DisasContext *s, int imm16)
970 {
971     /* The pre HVC helper handles cases when HVC gets trapped
972      * as an undefined insn by runtime configuration (ie before
973      * the insn really executes).
974      */
975     gen_set_pc_im(s, s->pc - 4);
976     gen_helper_pre_hvc(cpu_env);
977     /* Otherwise we will treat this as a real exception which
978      * happens after execution of the insn. (The distinction matters
979      * for the PC value reported to the exception handler and also
980      * for single stepping.)
981      */
982     s->svc_imm = imm16;
983     gen_set_pc_im(s, s->pc);
984     s->is_jmp = DISAS_HVC;
985 }
986
987 static inline void gen_smc(DisasContext *s)
988 {
989     /* As with HVC, we may take an exception either before or after
990      * the insn executes.
991      */
992     TCGv_i32 tmp;
993
994     gen_set_pc_im(s, s->pc - 4);
995     tmp = tcg_const_i32(syn_aa32_smc());
996     gen_helper_pre_smc(cpu_env, tmp);
997     tcg_temp_free_i32(tmp);
998     gen_set_pc_im(s, s->pc);
999     s->is_jmp = DISAS_SMC;
1000 }
1001
1002 static inline void
1003 gen_set_condexec (DisasContext *s)
1004 {
1005     if (s->condexec_mask) {
1006         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
1007         TCGv_i32 tmp = tcg_temp_new_i32();
1008         tcg_gen_movi_i32(tmp, val);
1009         store_cpu_field(tmp, condexec_bits);
1010     }
1011 }
1012
1013 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
1014 {
1015     gen_set_condexec(s);
1016     gen_set_pc_im(s, s->pc - offset);
1017     gen_exception_internal(excp);
1018     s->is_jmp = DISAS_JUMP;
1019 }
1020
1021 static void gen_exception_insn(DisasContext *s, int offset, int excp,
1022                                int syn, uint32_t target_el)
1023 {
1024     gen_set_condexec(s);
1025     gen_set_pc_im(s, s->pc - offset);
1026     gen_exception(excp, syn, target_el);
1027     s->is_jmp = DISAS_JUMP;
1028 }
1029
1030 /* Force a TB lookup after an instruction that changes the CPU state.  */
1031 static inline void gen_lookup_tb(DisasContext *s)
1032 {
1033     tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1034     s->is_jmp = DISAS_UPDATE;
1035 }
1036
1037 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1038                                        TCGv_i32 var)
1039 {
1040     int val, rm, shift, shiftop;
1041     TCGv_i32 offset;
1042
1043     if (!(insn & (1 << 25))) {
1044         /* immediate */
1045         val = insn & 0xfff;
1046         if (!(insn & (1 << 23)))
1047             val = -val;
1048         if (val != 0)
1049             tcg_gen_addi_i32(var, var, val);
1050     } else {
1051         /* shift/register */
1052         rm = (insn) & 0xf;
1053         shift = (insn >> 7) & 0x1f;
1054         shiftop = (insn >> 5) & 3;
1055         offset = load_reg(s, rm);
1056         gen_arm_shift_im(offset, shiftop, shift, 0);
1057         if (!(insn & (1 << 23)))
1058             tcg_gen_sub_i32(var, var, offset);
1059         else
1060             tcg_gen_add_i32(var, var, offset);
1061         tcg_temp_free_i32(offset);
1062     }
1063 }
1064
1065 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1066                                         int extra, TCGv_i32 var)
1067 {
1068     int val, rm;
1069     TCGv_i32 offset;
1070
1071     if (insn & (1 << 22)) {
1072         /* immediate */
1073         val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1074         if (!(insn & (1 << 23)))
1075             val = -val;
1076         val += extra;
1077         if (val != 0)
1078             tcg_gen_addi_i32(var, var, val);
1079     } else {
1080         /* register */
1081         if (extra)
1082             tcg_gen_addi_i32(var, var, extra);
1083         rm = (insn) & 0xf;
1084         offset = load_reg(s, rm);
1085         if (!(insn & (1 << 23)))
1086             tcg_gen_sub_i32(var, var, offset);
1087         else
1088             tcg_gen_add_i32(var, var, offset);
1089         tcg_temp_free_i32(offset);
1090     }
1091 }
1092
1093 static TCGv_ptr get_fpstatus_ptr(int neon)
1094 {
1095     TCGv_ptr statusptr = tcg_temp_new_ptr();
1096     int offset;
1097     if (neon) {
1098         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1099     } else {
1100         offset = offsetof(CPUARMState, vfp.fp_status);
1101     }
1102     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1103     return statusptr;
1104 }
1105
1106 #define VFP_OP2(name)                                                 \
1107 static inline void gen_vfp_##name(int dp)                             \
1108 {                                                                     \
1109     TCGv_ptr fpst = get_fpstatus_ptr(0);                              \
1110     if (dp) {                                                         \
1111         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst);    \
1112     } else {                                                          \
1113         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst);    \
1114     }                                                                 \
1115     tcg_temp_free_ptr(fpst);                                          \
1116 }
1117
1118 VFP_OP2(add)
1119 VFP_OP2(sub)
1120 VFP_OP2(mul)
1121 VFP_OP2(div)
1122
1123 #undef VFP_OP2
1124
1125 static inline void gen_vfp_F1_mul(int dp)
1126 {
1127     /* Like gen_vfp_mul() but put result in F1 */
1128     TCGv_ptr fpst = get_fpstatus_ptr(0);
1129     if (dp) {
1130         gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
1131     } else {
1132         gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
1133     }
1134     tcg_temp_free_ptr(fpst);
1135 }
1136
1137 static inline void gen_vfp_F1_neg(int dp)
1138 {
1139     /* Like gen_vfp_neg() but put result in F1 */
1140     if (dp) {
1141         gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
1142     } else {
1143         gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
1144     }
1145 }
1146
1147 static inline void gen_vfp_abs(int dp)
1148 {
1149     if (dp)
1150         gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1151     else
1152         gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1153 }
1154
1155 static inline void gen_vfp_neg(int dp)
1156 {
1157     if (dp)
1158         gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1159     else
1160         gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1161 }
1162
1163 static inline void gen_vfp_sqrt(int dp)
1164 {
1165     if (dp)
1166         gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1167     else
1168         gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1169 }
1170
1171 static inline void gen_vfp_cmp(int dp)
1172 {
1173     if (dp)
1174         gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1175     else
1176         gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1177 }
1178
1179 static inline void gen_vfp_cmpe(int dp)
1180 {
1181     if (dp)
1182         gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1183     else
1184         gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1185 }
1186
1187 static inline void gen_vfp_F1_ld0(int dp)
1188 {
1189     if (dp)
1190         tcg_gen_movi_i64(cpu_F1d, 0);
1191     else
1192         tcg_gen_movi_i32(cpu_F1s, 0);
1193 }
1194
1195 #define VFP_GEN_ITOF(name) \
1196 static inline void gen_vfp_##name(int dp, int neon) \
1197 { \
1198     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1199     if (dp) { \
1200         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1201     } else { \
1202         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1203     } \
1204     tcg_temp_free_ptr(statusptr); \
1205 }
1206
1207 VFP_GEN_ITOF(uito)
1208 VFP_GEN_ITOF(sito)
1209 #undef VFP_GEN_ITOF
1210
1211 #define VFP_GEN_FTOI(name) \
1212 static inline void gen_vfp_##name(int dp, int neon) \
1213 { \
1214     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1215     if (dp) { \
1216         gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1217     } else { \
1218         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1219     } \
1220     tcg_temp_free_ptr(statusptr); \
1221 }
1222
1223 VFP_GEN_FTOI(toui)
1224 VFP_GEN_FTOI(touiz)
1225 VFP_GEN_FTOI(tosi)
1226 VFP_GEN_FTOI(tosiz)
1227 #undef VFP_GEN_FTOI
1228
1229 #define VFP_GEN_FIX(name, round) \
1230 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1231 { \
1232     TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1233     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1234     if (dp) { \
1235         gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1236                                         statusptr); \
1237     } else { \
1238         gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1239                                         statusptr); \
1240     } \
1241     tcg_temp_free_i32(tmp_shift); \
1242     tcg_temp_free_ptr(statusptr); \
1243 }
1244 VFP_GEN_FIX(tosh, _round_to_zero)
1245 VFP_GEN_FIX(tosl, _round_to_zero)
1246 VFP_GEN_FIX(touh, _round_to_zero)
1247 VFP_GEN_FIX(toul, _round_to_zero)
1248 VFP_GEN_FIX(shto, )
1249 VFP_GEN_FIX(slto, )
1250 VFP_GEN_FIX(uhto, )
1251 VFP_GEN_FIX(ulto, )
1252 #undef VFP_GEN_FIX
1253
1254 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
1255 {
1256     if (dp) {
1257         gen_aa32_ld64(cpu_F0d, addr, get_mem_index(s));
1258     } else {
1259         gen_aa32_ld32u(cpu_F0s, addr, get_mem_index(s));
1260     }
1261 }
1262
1263 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
1264 {
1265     if (dp) {
1266         gen_aa32_st64(cpu_F0d, addr, get_mem_index(s));
1267     } else {
1268         gen_aa32_st32(cpu_F0s, addr, get_mem_index(s));
1269     }
1270 }
1271
1272 static inline long
1273 vfp_reg_offset (int dp, int reg)
1274 {
1275     if (dp)
1276         return offsetof(CPUARMState, vfp.regs[reg]);
1277     else if (reg & 1) {
1278         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1279           + offsetof(CPU_DoubleU, l.upper);
1280     } else {
1281         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1282           + offsetof(CPU_DoubleU, l.lower);
1283     }
1284 }
1285
1286 /* Return the offset of a 32-bit piece of a NEON register.
1287    zero is the least significant end of the register.  */
1288 static inline long
1289 neon_reg_offset (int reg, int n)
1290 {
1291     int sreg;
1292     sreg = reg * 2 + n;
1293     return vfp_reg_offset(0, sreg);
1294 }
1295
1296 static TCGv_i32 neon_load_reg(int reg, int pass)
1297 {
1298     TCGv_i32 tmp = tcg_temp_new_i32();
1299     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1300     return tmp;
1301 }
1302
1303 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1304 {
1305     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1306     tcg_temp_free_i32(var);
1307 }
1308
1309 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1310 {
1311     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1312 }
1313
1314 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1315 {
1316     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1317 }
1318
1319 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1320 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1321 #define tcg_gen_st_f32 tcg_gen_st_i32
1322 #define tcg_gen_st_f64 tcg_gen_st_i64
1323
1324 static inline void gen_mov_F0_vreg(int dp, int reg)
1325 {
1326     if (dp)
1327         tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1328     else
1329         tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1330 }
1331
1332 static inline void gen_mov_F1_vreg(int dp, int reg)
1333 {
1334     if (dp)
1335         tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1336     else
1337         tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1338 }
1339
1340 static inline void gen_mov_vreg_F0(int dp, int reg)
1341 {
1342     if (dp)
1343         tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1344     else
1345         tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1346 }
1347
1348 #define ARM_CP_RW_BIT   (1 << 20)
1349
1350 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1351 {
1352     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1353 }
1354
1355 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1356 {
1357     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1358 }
1359
1360 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1361 {
1362     TCGv_i32 var = tcg_temp_new_i32();
1363     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1364     return var;
1365 }
1366
1367 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1368 {
1369     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1370     tcg_temp_free_i32(var);
1371 }
1372
1373 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1374 {
1375     iwmmxt_store_reg(cpu_M0, rn);
1376 }
1377
1378 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1379 {
1380     iwmmxt_load_reg(cpu_M0, rn);
1381 }
1382
1383 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1384 {
1385     iwmmxt_load_reg(cpu_V1, rn);
1386     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1387 }
1388
1389 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1390 {
1391     iwmmxt_load_reg(cpu_V1, rn);
1392     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1393 }
1394
1395 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1396 {
1397     iwmmxt_load_reg(cpu_V1, rn);
1398     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1399 }
1400
1401 #define IWMMXT_OP(name) \
1402 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1403 { \
1404     iwmmxt_load_reg(cpu_V1, rn); \
1405     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1406 }
1407
1408 #define IWMMXT_OP_ENV(name) \
1409 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1410 { \
1411     iwmmxt_load_reg(cpu_V1, rn); \
1412     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1413 }
1414
1415 #define IWMMXT_OP_ENV_SIZE(name) \
1416 IWMMXT_OP_ENV(name##b) \
1417 IWMMXT_OP_ENV(name##w) \
1418 IWMMXT_OP_ENV(name##l)
1419
1420 #define IWMMXT_OP_ENV1(name) \
1421 static inline void gen_op_iwmmxt_##name##_M0(void) \
1422 { \
1423     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1424 }
1425
1426 IWMMXT_OP(maddsq)
1427 IWMMXT_OP(madduq)
1428 IWMMXT_OP(sadb)
1429 IWMMXT_OP(sadw)
1430 IWMMXT_OP(mulslw)
1431 IWMMXT_OP(mulshw)
1432 IWMMXT_OP(mululw)
1433 IWMMXT_OP(muluhw)
1434 IWMMXT_OP(macsw)
1435 IWMMXT_OP(macuw)
1436
1437 IWMMXT_OP_ENV_SIZE(unpackl)
1438 IWMMXT_OP_ENV_SIZE(unpackh)
1439
1440 IWMMXT_OP_ENV1(unpacklub)
1441 IWMMXT_OP_ENV1(unpackluw)
1442 IWMMXT_OP_ENV1(unpacklul)
1443 IWMMXT_OP_ENV1(unpackhub)
1444 IWMMXT_OP_ENV1(unpackhuw)
1445 IWMMXT_OP_ENV1(unpackhul)
1446 IWMMXT_OP_ENV1(unpacklsb)
1447 IWMMXT_OP_ENV1(unpacklsw)
1448 IWMMXT_OP_ENV1(unpacklsl)
1449 IWMMXT_OP_ENV1(unpackhsb)
1450 IWMMXT_OP_ENV1(unpackhsw)
1451 IWMMXT_OP_ENV1(unpackhsl)
1452
1453 IWMMXT_OP_ENV_SIZE(cmpeq)
1454 IWMMXT_OP_ENV_SIZE(cmpgtu)
1455 IWMMXT_OP_ENV_SIZE(cmpgts)
1456
1457 IWMMXT_OP_ENV_SIZE(mins)
1458 IWMMXT_OP_ENV_SIZE(minu)
1459 IWMMXT_OP_ENV_SIZE(maxs)
1460 IWMMXT_OP_ENV_SIZE(maxu)
1461
1462 IWMMXT_OP_ENV_SIZE(subn)
1463 IWMMXT_OP_ENV_SIZE(addn)
1464 IWMMXT_OP_ENV_SIZE(subu)
1465 IWMMXT_OP_ENV_SIZE(addu)
1466 IWMMXT_OP_ENV_SIZE(subs)
1467 IWMMXT_OP_ENV_SIZE(adds)
1468
1469 IWMMXT_OP_ENV(avgb0)
1470 IWMMXT_OP_ENV(avgb1)
1471 IWMMXT_OP_ENV(avgw0)
1472 IWMMXT_OP_ENV(avgw1)
1473
1474 IWMMXT_OP_ENV(packuw)
1475 IWMMXT_OP_ENV(packul)
1476 IWMMXT_OP_ENV(packuq)
1477 IWMMXT_OP_ENV(packsw)
1478 IWMMXT_OP_ENV(packsl)
1479 IWMMXT_OP_ENV(packsq)
1480
1481 static void gen_op_iwmmxt_set_mup(void)
1482 {
1483     TCGv_i32 tmp;
1484     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1485     tcg_gen_ori_i32(tmp, tmp, 2);
1486     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1487 }
1488
1489 static void gen_op_iwmmxt_set_cup(void)
1490 {
1491     TCGv_i32 tmp;
1492     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1493     tcg_gen_ori_i32(tmp, tmp, 1);
1494     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1495 }
1496
1497 static void gen_op_iwmmxt_setpsr_nz(void)
1498 {
1499     TCGv_i32 tmp = tcg_temp_new_i32();
1500     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1501     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1502 }
1503
1504 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1505 {
1506     iwmmxt_load_reg(cpu_V1, rn);
1507     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1508     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1509 }
1510
1511 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1512                                      TCGv_i32 dest)
1513 {
1514     int rd;
1515     uint32_t offset;
1516     TCGv_i32 tmp;
1517
1518     rd = (insn >> 16) & 0xf;
1519     tmp = load_reg(s, rd);
1520
1521     offset = (insn & 0xff) << ((insn >> 7) & 2);
1522     if (insn & (1 << 24)) {
1523         /* Pre indexed */
1524         if (insn & (1 << 23))
1525             tcg_gen_addi_i32(tmp, tmp, offset);
1526         else
1527             tcg_gen_addi_i32(tmp, tmp, -offset);
1528         tcg_gen_mov_i32(dest, tmp);
1529         if (insn & (1 << 21))
1530             store_reg(s, rd, tmp);
1531         else
1532             tcg_temp_free_i32(tmp);
1533     } else if (insn & (1 << 21)) {
1534         /* Post indexed */
1535         tcg_gen_mov_i32(dest, tmp);
1536         if (insn & (1 << 23))
1537             tcg_gen_addi_i32(tmp, tmp, offset);
1538         else
1539             tcg_gen_addi_i32(tmp, tmp, -offset);
1540         store_reg(s, rd, tmp);
1541     } else if (!(insn & (1 << 23)))
1542         return 1;
1543     return 0;
1544 }
1545
1546 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1547 {
1548     int rd = (insn >> 0) & 0xf;
1549     TCGv_i32 tmp;
1550
1551     if (insn & (1 << 8)) {
1552         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1553             return 1;
1554         } else {
1555             tmp = iwmmxt_load_creg(rd);
1556         }
1557     } else {
1558         tmp = tcg_temp_new_i32();
1559         iwmmxt_load_reg(cpu_V0, rd);
1560         tcg_gen_trunc_i64_i32(tmp, cpu_V0);
1561     }
1562     tcg_gen_andi_i32(tmp, tmp, mask);
1563     tcg_gen_mov_i32(dest, tmp);
1564     tcg_temp_free_i32(tmp);
1565     return 0;
1566 }
1567
1568 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1569    (ie. an undefined instruction).  */
1570 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1571 {
1572     int rd, wrd;
1573     int rdhi, rdlo, rd0, rd1, i;
1574     TCGv_i32 addr;
1575     TCGv_i32 tmp, tmp2, tmp3;
1576
1577     if ((insn & 0x0e000e00) == 0x0c000000) {
1578         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1579             wrd = insn & 0xf;
1580             rdlo = (insn >> 12) & 0xf;
1581             rdhi = (insn >> 16) & 0xf;
1582             if (insn & ARM_CP_RW_BIT) {                 /* TMRRC */
1583                 iwmmxt_load_reg(cpu_V0, wrd);
1584                 tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
1585                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1586                 tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
1587             } else {                                    /* TMCRR */
1588                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1589                 iwmmxt_store_reg(cpu_V0, wrd);
1590                 gen_op_iwmmxt_set_mup();
1591             }
1592             return 0;
1593         }
1594
1595         wrd = (insn >> 12) & 0xf;
1596         addr = tcg_temp_new_i32();
1597         if (gen_iwmmxt_address(s, insn, addr)) {
1598             tcg_temp_free_i32(addr);
1599             return 1;
1600         }
1601         if (insn & ARM_CP_RW_BIT) {
1602             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1603                 tmp = tcg_temp_new_i32();
1604                 gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1605                 iwmmxt_store_creg(wrd, tmp);
1606             } else {
1607                 i = 1;
1608                 if (insn & (1 << 8)) {
1609                     if (insn & (1 << 22)) {             /* WLDRD */
1610                         gen_aa32_ld64(cpu_M0, addr, get_mem_index(s));
1611                         i = 0;
1612                     } else {                            /* WLDRW wRd */
1613                         tmp = tcg_temp_new_i32();
1614                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1615                     }
1616                 } else {
1617                     tmp = tcg_temp_new_i32();
1618                     if (insn & (1 << 22)) {             /* WLDRH */
1619                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
1620                     } else {                            /* WLDRB */
1621                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
1622                     }
1623                 }
1624                 if (i) {
1625                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1626                     tcg_temp_free_i32(tmp);
1627                 }
1628                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1629             }
1630         } else {
1631             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1632                 tmp = iwmmxt_load_creg(wrd);
1633                 gen_aa32_st32(tmp, addr, get_mem_index(s));
1634             } else {
1635                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1636                 tmp = tcg_temp_new_i32();
1637                 if (insn & (1 << 8)) {
1638                     if (insn & (1 << 22)) {             /* WSTRD */
1639                         gen_aa32_st64(cpu_M0, addr, get_mem_index(s));
1640                     } else {                            /* WSTRW wRd */
1641                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1642                         gen_aa32_st32(tmp, addr, get_mem_index(s));
1643                     }
1644                 } else {
1645                     if (insn & (1 << 22)) {             /* WSTRH */
1646                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1647                         gen_aa32_st16(tmp, addr, get_mem_index(s));
1648                     } else {                            /* WSTRB */
1649                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1650                         gen_aa32_st8(tmp, addr, get_mem_index(s));
1651                     }
1652                 }
1653             }
1654             tcg_temp_free_i32(tmp);
1655         }
1656         tcg_temp_free_i32(addr);
1657         return 0;
1658     }
1659
1660     if ((insn & 0x0f000000) != 0x0e000000)
1661         return 1;
1662
1663     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1664     case 0x000:                                         /* WOR */
1665         wrd = (insn >> 12) & 0xf;
1666         rd0 = (insn >> 0) & 0xf;
1667         rd1 = (insn >> 16) & 0xf;
1668         gen_op_iwmmxt_movq_M0_wRn(rd0);
1669         gen_op_iwmmxt_orq_M0_wRn(rd1);
1670         gen_op_iwmmxt_setpsr_nz();
1671         gen_op_iwmmxt_movq_wRn_M0(wrd);
1672         gen_op_iwmmxt_set_mup();
1673         gen_op_iwmmxt_set_cup();
1674         break;
1675     case 0x011:                                         /* TMCR */
1676         if (insn & 0xf)
1677             return 1;
1678         rd = (insn >> 12) & 0xf;
1679         wrd = (insn >> 16) & 0xf;
1680         switch (wrd) {
1681         case ARM_IWMMXT_wCID:
1682         case ARM_IWMMXT_wCASF:
1683             break;
1684         case ARM_IWMMXT_wCon:
1685             gen_op_iwmmxt_set_cup();
1686             /* Fall through.  */
1687         case ARM_IWMMXT_wCSSF:
1688             tmp = iwmmxt_load_creg(wrd);
1689             tmp2 = load_reg(s, rd);
1690             tcg_gen_andc_i32(tmp, tmp, tmp2);
1691             tcg_temp_free_i32(tmp2);
1692             iwmmxt_store_creg(wrd, tmp);
1693             break;
1694         case ARM_IWMMXT_wCGR0:
1695         case ARM_IWMMXT_wCGR1:
1696         case ARM_IWMMXT_wCGR2:
1697         case ARM_IWMMXT_wCGR3:
1698             gen_op_iwmmxt_set_cup();
1699             tmp = load_reg(s, rd);
1700             iwmmxt_store_creg(wrd, tmp);
1701             break;
1702         default:
1703             return 1;
1704         }
1705         break;
1706     case 0x100:                                         /* WXOR */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 0) & 0xf;
1709         rd1 = (insn >> 16) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1712         gen_op_iwmmxt_setpsr_nz();
1713         gen_op_iwmmxt_movq_wRn_M0(wrd);
1714         gen_op_iwmmxt_set_mup();
1715         gen_op_iwmmxt_set_cup();
1716         break;
1717     case 0x111:                                         /* TMRC */
1718         if (insn & 0xf)
1719             return 1;
1720         rd = (insn >> 12) & 0xf;
1721         wrd = (insn >> 16) & 0xf;
1722         tmp = iwmmxt_load_creg(wrd);
1723         store_reg(s, rd, tmp);
1724         break;
1725     case 0x300:                                         /* WANDN */
1726         wrd = (insn >> 12) & 0xf;
1727         rd0 = (insn >> 0) & 0xf;
1728         rd1 = (insn >> 16) & 0xf;
1729         gen_op_iwmmxt_movq_M0_wRn(rd0);
1730         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1731         gen_op_iwmmxt_andq_M0_wRn(rd1);
1732         gen_op_iwmmxt_setpsr_nz();
1733         gen_op_iwmmxt_movq_wRn_M0(wrd);
1734         gen_op_iwmmxt_set_mup();
1735         gen_op_iwmmxt_set_cup();
1736         break;
1737     case 0x200:                                         /* WAND */
1738         wrd = (insn >> 12) & 0xf;
1739         rd0 = (insn >> 0) & 0xf;
1740         rd1 = (insn >> 16) & 0xf;
1741         gen_op_iwmmxt_movq_M0_wRn(rd0);
1742         gen_op_iwmmxt_andq_M0_wRn(rd1);
1743         gen_op_iwmmxt_setpsr_nz();
1744         gen_op_iwmmxt_movq_wRn_M0(wrd);
1745         gen_op_iwmmxt_set_mup();
1746         gen_op_iwmmxt_set_cup();
1747         break;
1748     case 0x810: case 0xa10:                             /* WMADD */
1749         wrd = (insn >> 12) & 0xf;
1750         rd0 = (insn >> 0) & 0xf;
1751         rd1 = (insn >> 16) & 0xf;
1752         gen_op_iwmmxt_movq_M0_wRn(rd0);
1753         if (insn & (1 << 21))
1754             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1755         else
1756             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1757         gen_op_iwmmxt_movq_wRn_M0(wrd);
1758         gen_op_iwmmxt_set_mup();
1759         break;
1760     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1761         wrd = (insn >> 12) & 0xf;
1762         rd0 = (insn >> 16) & 0xf;
1763         rd1 = (insn >> 0) & 0xf;
1764         gen_op_iwmmxt_movq_M0_wRn(rd0);
1765         switch ((insn >> 22) & 3) {
1766         case 0:
1767             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1768             break;
1769         case 1:
1770             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1771             break;
1772         case 2:
1773             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1774             break;
1775         case 3:
1776             return 1;
1777         }
1778         gen_op_iwmmxt_movq_wRn_M0(wrd);
1779         gen_op_iwmmxt_set_mup();
1780         gen_op_iwmmxt_set_cup();
1781         break;
1782     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1783         wrd = (insn >> 12) & 0xf;
1784         rd0 = (insn >> 16) & 0xf;
1785         rd1 = (insn >> 0) & 0xf;
1786         gen_op_iwmmxt_movq_M0_wRn(rd0);
1787         switch ((insn >> 22) & 3) {
1788         case 0:
1789             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1790             break;
1791         case 1:
1792             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1793             break;
1794         case 2:
1795             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1796             break;
1797         case 3:
1798             return 1;
1799         }
1800         gen_op_iwmmxt_movq_wRn_M0(wrd);
1801         gen_op_iwmmxt_set_mup();
1802         gen_op_iwmmxt_set_cup();
1803         break;
1804     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1805         wrd = (insn >> 12) & 0xf;
1806         rd0 = (insn >> 16) & 0xf;
1807         rd1 = (insn >> 0) & 0xf;
1808         gen_op_iwmmxt_movq_M0_wRn(rd0);
1809         if (insn & (1 << 22))
1810             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1811         else
1812             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1813         if (!(insn & (1 << 20)))
1814             gen_op_iwmmxt_addl_M0_wRn(wrd);
1815         gen_op_iwmmxt_movq_wRn_M0(wrd);
1816         gen_op_iwmmxt_set_mup();
1817         break;
1818     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1819         wrd = (insn >> 12) & 0xf;
1820         rd0 = (insn >> 16) & 0xf;
1821         rd1 = (insn >> 0) & 0xf;
1822         gen_op_iwmmxt_movq_M0_wRn(rd0);
1823         if (insn & (1 << 21)) {
1824             if (insn & (1 << 20))
1825                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1826             else
1827                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1828         } else {
1829             if (insn & (1 << 20))
1830                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1831             else
1832                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1833         }
1834         gen_op_iwmmxt_movq_wRn_M0(wrd);
1835         gen_op_iwmmxt_set_mup();
1836         break;
1837     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1838         wrd = (insn >> 12) & 0xf;
1839         rd0 = (insn >> 16) & 0xf;
1840         rd1 = (insn >> 0) & 0xf;
1841         gen_op_iwmmxt_movq_M0_wRn(rd0);
1842         if (insn & (1 << 21))
1843             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1844         else
1845             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1846         if (!(insn & (1 << 20))) {
1847             iwmmxt_load_reg(cpu_V1, wrd);
1848             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1849         }
1850         gen_op_iwmmxt_movq_wRn_M0(wrd);
1851         gen_op_iwmmxt_set_mup();
1852         break;
1853     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1854         wrd = (insn >> 12) & 0xf;
1855         rd0 = (insn >> 16) & 0xf;
1856         rd1 = (insn >> 0) & 0xf;
1857         gen_op_iwmmxt_movq_M0_wRn(rd0);
1858         switch ((insn >> 22) & 3) {
1859         case 0:
1860             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1861             break;
1862         case 1:
1863             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1864             break;
1865         case 2:
1866             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1867             break;
1868         case 3:
1869             return 1;
1870         }
1871         gen_op_iwmmxt_movq_wRn_M0(wrd);
1872         gen_op_iwmmxt_set_mup();
1873         gen_op_iwmmxt_set_cup();
1874         break;
1875     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1876         wrd = (insn >> 12) & 0xf;
1877         rd0 = (insn >> 16) & 0xf;
1878         rd1 = (insn >> 0) & 0xf;
1879         gen_op_iwmmxt_movq_M0_wRn(rd0);
1880         if (insn & (1 << 22)) {
1881             if (insn & (1 << 20))
1882                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1883             else
1884                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1885         } else {
1886             if (insn & (1 << 20))
1887                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1888             else
1889                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1890         }
1891         gen_op_iwmmxt_movq_wRn_M0(wrd);
1892         gen_op_iwmmxt_set_mup();
1893         gen_op_iwmmxt_set_cup();
1894         break;
1895     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1896         wrd = (insn >> 12) & 0xf;
1897         rd0 = (insn >> 16) & 0xf;
1898         rd1 = (insn >> 0) & 0xf;
1899         gen_op_iwmmxt_movq_M0_wRn(rd0);
1900         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1901         tcg_gen_andi_i32(tmp, tmp, 7);
1902         iwmmxt_load_reg(cpu_V1, rd1);
1903         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1904         tcg_temp_free_i32(tmp);
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         break;
1908     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1909         if (((insn >> 6) & 3) == 3)
1910             return 1;
1911         rd = (insn >> 12) & 0xf;
1912         wrd = (insn >> 16) & 0xf;
1913         tmp = load_reg(s, rd);
1914         gen_op_iwmmxt_movq_M0_wRn(wrd);
1915         switch ((insn >> 6) & 3) {
1916         case 0:
1917             tmp2 = tcg_const_i32(0xff);
1918             tmp3 = tcg_const_i32((insn & 7) << 3);
1919             break;
1920         case 1:
1921             tmp2 = tcg_const_i32(0xffff);
1922             tmp3 = tcg_const_i32((insn & 3) << 4);
1923             break;
1924         case 2:
1925             tmp2 = tcg_const_i32(0xffffffff);
1926             tmp3 = tcg_const_i32((insn & 1) << 5);
1927             break;
1928         default:
1929             TCGV_UNUSED_I32(tmp2);
1930             TCGV_UNUSED_I32(tmp3);
1931         }
1932         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1933         tcg_temp_free_i32(tmp3);
1934         tcg_temp_free_i32(tmp2);
1935         tcg_temp_free_i32(tmp);
1936         gen_op_iwmmxt_movq_wRn_M0(wrd);
1937         gen_op_iwmmxt_set_mup();
1938         break;
1939     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1940         rd = (insn >> 12) & 0xf;
1941         wrd = (insn >> 16) & 0xf;
1942         if (rd == 15 || ((insn >> 22) & 3) == 3)
1943             return 1;
1944         gen_op_iwmmxt_movq_M0_wRn(wrd);
1945         tmp = tcg_temp_new_i32();
1946         switch ((insn >> 22) & 3) {
1947         case 0:
1948             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1949             tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1950             if (insn & 8) {
1951                 tcg_gen_ext8s_i32(tmp, tmp);
1952             } else {
1953                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1954             }
1955             break;
1956         case 1:
1957             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1958             tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1959             if (insn & 8) {
1960                 tcg_gen_ext16s_i32(tmp, tmp);
1961             } else {
1962                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1963             }
1964             break;
1965         case 2:
1966             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1967             tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1968             break;
1969         }
1970         store_reg(s, rd, tmp);
1971         break;
1972     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1973         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1974             return 1;
1975         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1976         switch ((insn >> 22) & 3) {
1977         case 0:
1978             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1979             break;
1980         case 1:
1981             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1982             break;
1983         case 2:
1984             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1985             break;
1986         }
1987         tcg_gen_shli_i32(tmp, tmp, 28);
1988         gen_set_nzcv(tmp);
1989         tcg_temp_free_i32(tmp);
1990         break;
1991     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1992         if (((insn >> 6) & 3) == 3)
1993             return 1;
1994         rd = (insn >> 12) & 0xf;
1995         wrd = (insn >> 16) & 0xf;
1996         tmp = load_reg(s, rd);
1997         switch ((insn >> 6) & 3) {
1998         case 0:
1999             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
2000             break;
2001         case 1:
2002             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
2003             break;
2004         case 2:
2005             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
2006             break;
2007         }
2008         tcg_temp_free_i32(tmp);
2009         gen_op_iwmmxt_movq_wRn_M0(wrd);
2010         gen_op_iwmmxt_set_mup();
2011         break;
2012     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
2013         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2014             return 1;
2015         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2016         tmp2 = tcg_temp_new_i32();
2017         tcg_gen_mov_i32(tmp2, tmp);
2018         switch ((insn >> 22) & 3) {
2019         case 0:
2020             for (i = 0; i < 7; i ++) {
2021                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2022                 tcg_gen_and_i32(tmp, tmp, tmp2);
2023             }
2024             break;
2025         case 1:
2026             for (i = 0; i < 3; i ++) {
2027                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2028                 tcg_gen_and_i32(tmp, tmp, tmp2);
2029             }
2030             break;
2031         case 2:
2032             tcg_gen_shli_i32(tmp2, tmp2, 16);
2033             tcg_gen_and_i32(tmp, tmp, tmp2);
2034             break;
2035         }
2036         gen_set_nzcv(tmp);
2037         tcg_temp_free_i32(tmp2);
2038         tcg_temp_free_i32(tmp);
2039         break;
2040     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2041         wrd = (insn >> 12) & 0xf;
2042         rd0 = (insn >> 16) & 0xf;
2043         gen_op_iwmmxt_movq_M0_wRn(rd0);
2044         switch ((insn >> 22) & 3) {
2045         case 0:
2046             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2047             break;
2048         case 1:
2049             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2050             break;
2051         case 2:
2052             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2053             break;
2054         case 3:
2055             return 1;
2056         }
2057         gen_op_iwmmxt_movq_wRn_M0(wrd);
2058         gen_op_iwmmxt_set_mup();
2059         break;
2060     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2061         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2062             return 1;
2063         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2064         tmp2 = tcg_temp_new_i32();
2065         tcg_gen_mov_i32(tmp2, tmp);
2066         switch ((insn >> 22) & 3) {
2067         case 0:
2068             for (i = 0; i < 7; i ++) {
2069                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2070                 tcg_gen_or_i32(tmp, tmp, tmp2);
2071             }
2072             break;
2073         case 1:
2074             for (i = 0; i < 3; i ++) {
2075                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2076                 tcg_gen_or_i32(tmp, tmp, tmp2);
2077             }
2078             break;
2079         case 2:
2080             tcg_gen_shli_i32(tmp2, tmp2, 16);
2081             tcg_gen_or_i32(tmp, tmp, tmp2);
2082             break;
2083         }
2084         gen_set_nzcv(tmp);
2085         tcg_temp_free_i32(tmp2);
2086         tcg_temp_free_i32(tmp);
2087         break;
2088     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2089         rd = (insn >> 12) & 0xf;
2090         rd0 = (insn >> 16) & 0xf;
2091         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2092             return 1;
2093         gen_op_iwmmxt_movq_M0_wRn(rd0);
2094         tmp = tcg_temp_new_i32();
2095         switch ((insn >> 22) & 3) {
2096         case 0:
2097             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2098             break;
2099         case 1:
2100             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2101             break;
2102         case 2:
2103             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2104             break;
2105         }
2106         store_reg(s, rd, tmp);
2107         break;
2108     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2109     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2110         wrd = (insn >> 12) & 0xf;
2111         rd0 = (insn >> 16) & 0xf;
2112         rd1 = (insn >> 0) & 0xf;
2113         gen_op_iwmmxt_movq_M0_wRn(rd0);
2114         switch ((insn >> 22) & 3) {
2115         case 0:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2118             else
2119                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2120             break;
2121         case 1:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2124             else
2125                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2126             break;
2127         case 2:
2128             if (insn & (1 << 21))
2129                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2130             else
2131                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2132             break;
2133         case 3:
2134             return 1;
2135         }
2136         gen_op_iwmmxt_movq_wRn_M0(wrd);
2137         gen_op_iwmmxt_set_mup();
2138         gen_op_iwmmxt_set_cup();
2139         break;
2140     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2141     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         gen_op_iwmmxt_movq_M0_wRn(rd0);
2145         switch ((insn >> 22) & 3) {
2146         case 0:
2147             if (insn & (1 << 21))
2148                 gen_op_iwmmxt_unpacklsb_M0();
2149             else
2150                 gen_op_iwmmxt_unpacklub_M0();
2151             break;
2152         case 1:
2153             if (insn & (1 << 21))
2154                 gen_op_iwmmxt_unpacklsw_M0();
2155             else
2156                 gen_op_iwmmxt_unpackluw_M0();
2157             break;
2158         case 2:
2159             if (insn & (1 << 21))
2160                 gen_op_iwmmxt_unpacklsl_M0();
2161             else
2162                 gen_op_iwmmxt_unpacklul_M0();
2163             break;
2164         case 3:
2165             return 1;
2166         }
2167         gen_op_iwmmxt_movq_wRn_M0(wrd);
2168         gen_op_iwmmxt_set_mup();
2169         gen_op_iwmmxt_set_cup();
2170         break;
2171     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2172     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2173         wrd = (insn >> 12) & 0xf;
2174         rd0 = (insn >> 16) & 0xf;
2175         gen_op_iwmmxt_movq_M0_wRn(rd0);
2176         switch ((insn >> 22) & 3) {
2177         case 0:
2178             if (insn & (1 << 21))
2179                 gen_op_iwmmxt_unpackhsb_M0();
2180             else
2181                 gen_op_iwmmxt_unpackhub_M0();
2182             break;
2183         case 1:
2184             if (insn & (1 << 21))
2185                 gen_op_iwmmxt_unpackhsw_M0();
2186             else
2187                 gen_op_iwmmxt_unpackhuw_M0();
2188             break;
2189         case 2:
2190             if (insn & (1 << 21))
2191                 gen_op_iwmmxt_unpackhsl_M0();
2192             else
2193                 gen_op_iwmmxt_unpackhul_M0();
2194             break;
2195         case 3:
2196             return 1;
2197         }
2198         gen_op_iwmmxt_movq_wRn_M0(wrd);
2199         gen_op_iwmmxt_set_mup();
2200         gen_op_iwmmxt_set_cup();
2201         break;
2202     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2203     case 0x214: case 0x614: case 0xa14: case 0xe14:
2204         if (((insn >> 22) & 3) == 0)
2205             return 1;
2206         wrd = (insn >> 12) & 0xf;
2207         rd0 = (insn >> 16) & 0xf;
2208         gen_op_iwmmxt_movq_M0_wRn(rd0);
2209         tmp = tcg_temp_new_i32();
2210         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2211             tcg_temp_free_i32(tmp);
2212             return 1;
2213         }
2214         switch ((insn >> 22) & 3) {
2215         case 1:
2216             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 2:
2219             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         case 3:
2222             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2223             break;
2224         }
2225         tcg_temp_free_i32(tmp);
2226         gen_op_iwmmxt_movq_wRn_M0(wrd);
2227         gen_op_iwmmxt_set_mup();
2228         gen_op_iwmmxt_set_cup();
2229         break;
2230     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2231     case 0x014: case 0x414: case 0x814: case 0xc14:
2232         if (((insn >> 22) & 3) == 0)
2233             return 1;
2234         wrd = (insn >> 12) & 0xf;
2235         rd0 = (insn >> 16) & 0xf;
2236         gen_op_iwmmxt_movq_M0_wRn(rd0);
2237         tmp = tcg_temp_new_i32();
2238         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2239             tcg_temp_free_i32(tmp);
2240             return 1;
2241         }
2242         switch ((insn >> 22) & 3) {
2243         case 1:
2244             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 2:
2247             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         case 3:
2250             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2251             break;
2252         }
2253         tcg_temp_free_i32(tmp);
2254         gen_op_iwmmxt_movq_wRn_M0(wrd);
2255         gen_op_iwmmxt_set_mup();
2256         gen_op_iwmmxt_set_cup();
2257         break;
2258     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2259     case 0x114: case 0x514: case 0x914: case 0xd14:
2260         if (((insn >> 22) & 3) == 0)
2261             return 1;
2262         wrd = (insn >> 12) & 0xf;
2263         rd0 = (insn >> 16) & 0xf;
2264         gen_op_iwmmxt_movq_M0_wRn(rd0);
2265         tmp = tcg_temp_new_i32();
2266         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2267             tcg_temp_free_i32(tmp);
2268             return 1;
2269         }
2270         switch ((insn >> 22) & 3) {
2271         case 1:
2272             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2273             break;
2274         case 2:
2275             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2276             break;
2277         case 3:
2278             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2279             break;
2280         }
2281         tcg_temp_free_i32(tmp);
2282         gen_op_iwmmxt_movq_wRn_M0(wrd);
2283         gen_op_iwmmxt_set_mup();
2284         gen_op_iwmmxt_set_cup();
2285         break;
2286     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2287     case 0x314: case 0x714: case 0xb14: case 0xf14:
2288         if (((insn >> 22) & 3) == 0)
2289             return 1;
2290         wrd = (insn >> 12) & 0xf;
2291         rd0 = (insn >> 16) & 0xf;
2292         gen_op_iwmmxt_movq_M0_wRn(rd0);
2293         tmp = tcg_temp_new_i32();
2294         switch ((insn >> 22) & 3) {
2295         case 1:
2296             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2297                 tcg_temp_free_i32(tmp);
2298                 return 1;
2299             }
2300             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2301             break;
2302         case 2:
2303             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2304                 tcg_temp_free_i32(tmp);
2305                 return 1;
2306             }
2307             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2308             break;
2309         case 3:
2310             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2311                 tcg_temp_free_i32(tmp);
2312                 return 1;
2313             }
2314             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2315             break;
2316         }
2317         tcg_temp_free_i32(tmp);
2318         gen_op_iwmmxt_movq_wRn_M0(wrd);
2319         gen_op_iwmmxt_set_mup();
2320         gen_op_iwmmxt_set_cup();
2321         break;
2322     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2323     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         rd1 = (insn >> 0) & 0xf;
2327         gen_op_iwmmxt_movq_M0_wRn(rd0);
2328         switch ((insn >> 22) & 3) {
2329         case 0:
2330             if (insn & (1 << 21))
2331                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2332             else
2333                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2334             break;
2335         case 1:
2336             if (insn & (1 << 21))
2337                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2338             else
2339                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2340             break;
2341         case 2:
2342             if (insn & (1 << 21))
2343                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2344             else
2345                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2346             break;
2347         case 3:
2348             return 1;
2349         }
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         break;
2353     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2354     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         switch ((insn >> 22) & 3) {
2360         case 0:
2361             if (insn & (1 << 21))
2362                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2363             else
2364                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2365             break;
2366         case 1:
2367             if (insn & (1 << 21))
2368                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2369             else
2370                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2371             break;
2372         case 2:
2373             if (insn & (1 << 21))
2374                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2375             else
2376                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2377             break;
2378         case 3:
2379             return 1;
2380         }
2381         gen_op_iwmmxt_movq_wRn_M0(wrd);
2382         gen_op_iwmmxt_set_mup();
2383         break;
2384     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2385     case 0x402: case 0x502: case 0x602: case 0x702:
2386         wrd = (insn >> 12) & 0xf;
2387         rd0 = (insn >> 16) & 0xf;
2388         rd1 = (insn >> 0) & 0xf;
2389         gen_op_iwmmxt_movq_M0_wRn(rd0);
2390         tmp = tcg_const_i32((insn >> 20) & 3);
2391         iwmmxt_load_reg(cpu_V1, rd1);
2392         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2393         tcg_temp_free_i32(tmp);
2394         gen_op_iwmmxt_movq_wRn_M0(wrd);
2395         gen_op_iwmmxt_set_mup();
2396         break;
2397     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2398     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2399     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2400     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2401         wrd = (insn >> 12) & 0xf;
2402         rd0 = (insn >> 16) & 0xf;
2403         rd1 = (insn >> 0) & 0xf;
2404         gen_op_iwmmxt_movq_M0_wRn(rd0);
2405         switch ((insn >> 20) & 0xf) {
2406         case 0x0:
2407             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2408             break;
2409         case 0x1:
2410             gen_op_iwmmxt_subub_M0_wRn(rd1);
2411             break;
2412         case 0x3:
2413             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2414             break;
2415         case 0x4:
2416             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2417             break;
2418         case 0x5:
2419             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2420             break;
2421         case 0x7:
2422             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2423             break;
2424         case 0x8:
2425             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2426             break;
2427         case 0x9:
2428             gen_op_iwmmxt_subul_M0_wRn(rd1);
2429             break;
2430         case 0xb:
2431             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2432             break;
2433         default:
2434             return 1;
2435         }
2436         gen_op_iwmmxt_movq_wRn_M0(wrd);
2437         gen_op_iwmmxt_set_mup();
2438         gen_op_iwmmxt_set_cup();
2439         break;
2440     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2441     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2442     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2443     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2444         wrd = (insn >> 12) & 0xf;
2445         rd0 = (insn >> 16) & 0xf;
2446         gen_op_iwmmxt_movq_M0_wRn(rd0);
2447         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2448         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2449         tcg_temp_free_i32(tmp);
2450         gen_op_iwmmxt_movq_wRn_M0(wrd);
2451         gen_op_iwmmxt_set_mup();
2452         gen_op_iwmmxt_set_cup();
2453         break;
2454     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2455     case 0x418: case 0x518: case 0x618: case 0x718:
2456     case 0x818: case 0x918: case 0xa18: case 0xb18:
2457     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2458         wrd = (insn >> 12) & 0xf;
2459         rd0 = (insn >> 16) & 0xf;
2460         rd1 = (insn >> 0) & 0xf;
2461         gen_op_iwmmxt_movq_M0_wRn(rd0);
2462         switch ((insn >> 20) & 0xf) {
2463         case 0x0:
2464             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2465             break;
2466         case 0x1:
2467             gen_op_iwmmxt_addub_M0_wRn(rd1);
2468             break;
2469         case 0x3:
2470             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2471             break;
2472         case 0x4:
2473             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2474             break;
2475         case 0x5:
2476             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2477             break;
2478         case 0x7:
2479             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2480             break;
2481         case 0x8:
2482             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2483             break;
2484         case 0x9:
2485             gen_op_iwmmxt_addul_M0_wRn(rd1);
2486             break;
2487         case 0xb:
2488             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2489             break;
2490         default:
2491             return 1;
2492         }
2493         gen_op_iwmmxt_movq_wRn_M0(wrd);
2494         gen_op_iwmmxt_set_mup();
2495         gen_op_iwmmxt_set_cup();
2496         break;
2497     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2498     case 0x408: case 0x508: case 0x608: case 0x708:
2499     case 0x808: case 0x908: case 0xa08: case 0xb08:
2500     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2501         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2502             return 1;
2503         wrd = (insn >> 12) & 0xf;
2504         rd0 = (insn >> 16) & 0xf;
2505         rd1 = (insn >> 0) & 0xf;
2506         gen_op_iwmmxt_movq_M0_wRn(rd0);
2507         switch ((insn >> 22) & 3) {
2508         case 1:
2509             if (insn & (1 << 21))
2510                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2511             else
2512                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2513             break;
2514         case 2:
2515             if (insn & (1 << 21))
2516                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2517             else
2518                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2519             break;
2520         case 3:
2521             if (insn & (1 << 21))
2522                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2523             else
2524                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2525             break;
2526         }
2527         gen_op_iwmmxt_movq_wRn_M0(wrd);
2528         gen_op_iwmmxt_set_mup();
2529         gen_op_iwmmxt_set_cup();
2530         break;
2531     case 0x201: case 0x203: case 0x205: case 0x207:
2532     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2533     case 0x211: case 0x213: case 0x215: case 0x217:
2534     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2535         wrd = (insn >> 5) & 0xf;
2536         rd0 = (insn >> 12) & 0xf;
2537         rd1 = (insn >> 0) & 0xf;
2538         if (rd0 == 0xf || rd1 == 0xf)
2539             return 1;
2540         gen_op_iwmmxt_movq_M0_wRn(wrd);
2541         tmp = load_reg(s, rd0);
2542         tmp2 = load_reg(s, rd1);
2543         switch ((insn >> 16) & 0xf) {
2544         case 0x0:                                       /* TMIA */
2545             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2546             break;
2547         case 0x8:                                       /* TMIAPH */
2548             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2549             break;
2550         case 0xc: case 0xd: case 0xe: case 0xf:         /* TMIAxy */
2551             if (insn & (1 << 16))
2552                 tcg_gen_shri_i32(tmp, tmp, 16);
2553             if (insn & (1 << 17))
2554                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2555             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2556             break;
2557         default:
2558             tcg_temp_free_i32(tmp2);
2559             tcg_temp_free_i32(tmp);
2560             return 1;
2561         }
2562         tcg_temp_free_i32(tmp2);
2563         tcg_temp_free_i32(tmp);
2564         gen_op_iwmmxt_movq_wRn_M0(wrd);
2565         gen_op_iwmmxt_set_mup();
2566         break;
2567     default:
2568         return 1;
2569     }
2570
2571     return 0;
2572 }
2573
2574 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2575    (ie. an undefined instruction).  */
2576 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2577 {
2578     int acc, rd0, rd1, rdhi, rdlo;
2579     TCGv_i32 tmp, tmp2;
2580
2581     if ((insn & 0x0ff00f10) == 0x0e200010) {
2582         /* Multiply with Internal Accumulate Format */
2583         rd0 = (insn >> 12) & 0xf;
2584         rd1 = insn & 0xf;
2585         acc = (insn >> 5) & 7;
2586
2587         if (acc != 0)
2588             return 1;
2589
2590         tmp = load_reg(s, rd0);
2591         tmp2 = load_reg(s, rd1);
2592         switch ((insn >> 16) & 0xf) {
2593         case 0x0:                                       /* MIA */
2594             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2595             break;
2596         case 0x8:                                       /* MIAPH */
2597             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2598             break;
2599         case 0xc:                                       /* MIABB */
2600         case 0xd:                                       /* MIABT */
2601         case 0xe:                                       /* MIATB */
2602         case 0xf:                                       /* MIATT */
2603             if (insn & (1 << 16))
2604                 tcg_gen_shri_i32(tmp, tmp, 16);
2605             if (insn & (1 << 17))
2606                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2607             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2608             break;
2609         default:
2610             return 1;
2611         }
2612         tcg_temp_free_i32(tmp2);
2613         tcg_temp_free_i32(tmp);
2614
2615         gen_op_iwmmxt_movq_wRn_M0(acc);
2616         return 0;
2617     }
2618
2619     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2620         /* Internal Accumulator Access Format */
2621         rdhi = (insn >> 16) & 0xf;
2622         rdlo = (insn >> 12) & 0xf;
2623         acc = insn & 7;
2624
2625         if (acc != 0)
2626             return 1;
2627
2628         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2629             iwmmxt_load_reg(cpu_V0, acc);
2630             tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
2631             tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2632             tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
2633             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2634         } else {                                        /* MAR */
2635             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2636             iwmmxt_store_reg(cpu_V0, acc);
2637         }
2638         return 0;
2639     }
2640
2641     return 1;
2642 }
2643
2644 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2645 #define VFP_SREG(insn, bigbit, smallbit) \
2646   ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2647 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2648     if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2649         reg = (((insn) >> (bigbit)) & 0x0f) \
2650               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2651     } else { \
2652         if (insn & (1 << (smallbit))) \
2653             return 1; \
2654         reg = ((insn) >> (bigbit)) & 0x0f; \
2655     }} while (0)
2656
2657 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2658 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2659 #define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2660 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2661 #define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2662 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2663
2664 /* Move between integer and VFP cores.  */
2665 static TCGv_i32 gen_vfp_mrs(void)
2666 {
2667     TCGv_i32 tmp = tcg_temp_new_i32();
2668     tcg_gen_mov_i32(tmp, cpu_F0s);
2669     return tmp;
2670 }
2671
2672 static void gen_vfp_msr(TCGv_i32 tmp)
2673 {
2674     tcg_gen_mov_i32(cpu_F0s, tmp);
2675     tcg_temp_free_i32(tmp);
2676 }
2677
2678 static void gen_neon_dup_u8(TCGv_i32 var, int shift)
2679 {
2680     TCGv_i32 tmp = tcg_temp_new_i32();
2681     if (shift)
2682         tcg_gen_shri_i32(var, var, shift);
2683     tcg_gen_ext8u_i32(var, var);
2684     tcg_gen_shli_i32(tmp, var, 8);
2685     tcg_gen_or_i32(var, var, tmp);
2686     tcg_gen_shli_i32(tmp, var, 16);
2687     tcg_gen_or_i32(var, var, tmp);
2688     tcg_temp_free_i32(tmp);
2689 }
2690
2691 static void gen_neon_dup_low16(TCGv_i32 var)
2692 {
2693     TCGv_i32 tmp = tcg_temp_new_i32();
2694     tcg_gen_ext16u_i32(var, var);
2695     tcg_gen_shli_i32(tmp, var, 16);
2696     tcg_gen_or_i32(var, var, tmp);
2697     tcg_temp_free_i32(tmp);
2698 }
2699
2700 static void gen_neon_dup_high16(TCGv_i32 var)
2701 {
2702     TCGv_i32 tmp = tcg_temp_new_i32();
2703     tcg_gen_andi_i32(var, var, 0xffff0000);
2704     tcg_gen_shri_i32(tmp, var, 16);
2705     tcg_gen_or_i32(var, var, tmp);
2706     tcg_temp_free_i32(tmp);
2707 }
2708
2709 static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
2710 {
2711     /* Load a single Neon element and replicate into a 32 bit TCG reg */
2712     TCGv_i32 tmp = tcg_temp_new_i32();
2713     switch (size) {
2714     case 0:
2715         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
2716         gen_neon_dup_u8(tmp, 0);
2717         break;
2718     case 1:
2719         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
2720         gen_neon_dup_low16(tmp);
2721         break;
2722     case 2:
2723         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
2724         break;
2725     default: /* Avoid compiler warnings.  */
2726         abort();
2727     }
2728     return tmp;
2729 }
2730
2731 static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
2732                        uint32_t dp)
2733 {
2734     uint32_t cc = extract32(insn, 20, 2);
2735
2736     if (dp) {
2737         TCGv_i64 frn, frm, dest;
2738         TCGv_i64 tmp, zero, zf, nf, vf;
2739
2740         zero = tcg_const_i64(0);
2741
2742         frn = tcg_temp_new_i64();
2743         frm = tcg_temp_new_i64();
2744         dest = tcg_temp_new_i64();
2745
2746         zf = tcg_temp_new_i64();
2747         nf = tcg_temp_new_i64();
2748         vf = tcg_temp_new_i64();
2749
2750         tcg_gen_extu_i32_i64(zf, cpu_ZF);
2751         tcg_gen_ext_i32_i64(nf, cpu_NF);
2752         tcg_gen_ext_i32_i64(vf, cpu_VF);
2753
2754         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2755         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2756         switch (cc) {
2757         case 0: /* eq: Z */
2758             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
2759                                 frn, frm);
2760             break;
2761         case 1: /* vs: V */
2762             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
2763                                 frn, frm);
2764             break;
2765         case 2: /* ge: N == V -> N ^ V == 0 */
2766             tmp = tcg_temp_new_i64();
2767             tcg_gen_xor_i64(tmp, vf, nf);
2768             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2769                                 frn, frm);
2770             tcg_temp_free_i64(tmp);
2771             break;
2772         case 3: /* gt: !Z && N == V */
2773             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
2774                                 frn, frm);
2775             tmp = tcg_temp_new_i64();
2776             tcg_gen_xor_i64(tmp, vf, nf);
2777             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2778                                 dest, frm);
2779             tcg_temp_free_i64(tmp);
2780             break;
2781         }
2782         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2783         tcg_temp_free_i64(frn);
2784         tcg_temp_free_i64(frm);
2785         tcg_temp_free_i64(dest);
2786
2787         tcg_temp_free_i64(zf);
2788         tcg_temp_free_i64(nf);
2789         tcg_temp_free_i64(vf);
2790
2791         tcg_temp_free_i64(zero);
2792     } else {
2793         TCGv_i32 frn, frm, dest;
2794         TCGv_i32 tmp, zero;
2795
2796         zero = tcg_const_i32(0);
2797
2798         frn = tcg_temp_new_i32();
2799         frm = tcg_temp_new_i32();
2800         dest = tcg_temp_new_i32();
2801         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2802         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2803         switch (cc) {
2804         case 0: /* eq: Z */
2805             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
2806                                 frn, frm);
2807             break;
2808         case 1: /* vs: V */
2809             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
2810                                 frn, frm);
2811             break;
2812         case 2: /* ge: N == V -> N ^ V == 0 */
2813             tmp = tcg_temp_new_i32();
2814             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2815             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2816                                 frn, frm);
2817             tcg_temp_free_i32(tmp);
2818             break;
2819         case 3: /* gt: !Z && N == V */
2820             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
2821                                 frn, frm);
2822             tmp = tcg_temp_new_i32();
2823             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2824             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2825                                 dest, frm);
2826             tcg_temp_free_i32(tmp);
2827             break;
2828         }
2829         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2830         tcg_temp_free_i32(frn);
2831         tcg_temp_free_i32(frm);
2832         tcg_temp_free_i32(dest);
2833
2834         tcg_temp_free_i32(zero);
2835     }
2836
2837     return 0;
2838 }
2839
2840 static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
2841                             uint32_t rm, uint32_t dp)
2842 {
2843     uint32_t vmin = extract32(insn, 6, 1);
2844     TCGv_ptr fpst = get_fpstatus_ptr(0);
2845
2846     if (dp) {
2847         TCGv_i64 frn, frm, dest;
2848
2849         frn = tcg_temp_new_i64();
2850         frm = tcg_temp_new_i64();
2851         dest = tcg_temp_new_i64();
2852
2853         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2854         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2855         if (vmin) {
2856             gen_helper_vfp_minnumd(dest, frn, frm, fpst);
2857         } else {
2858             gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
2859         }
2860         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2861         tcg_temp_free_i64(frn);
2862         tcg_temp_free_i64(frm);
2863         tcg_temp_free_i64(dest);
2864     } else {
2865         TCGv_i32 frn, frm, dest;
2866
2867         frn = tcg_temp_new_i32();
2868         frm = tcg_temp_new_i32();
2869         dest = tcg_temp_new_i32();
2870
2871         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2872         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2873         if (vmin) {
2874             gen_helper_vfp_minnums(dest, frn, frm, fpst);
2875         } else {
2876             gen_helper_vfp_maxnums(dest, frn, frm, fpst);
2877         }
2878         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2879         tcg_temp_free_i32(frn);
2880         tcg_temp_free_i32(frm);
2881         tcg_temp_free_i32(dest);
2882     }
2883
2884     tcg_temp_free_ptr(fpst);
2885     return 0;
2886 }
2887
2888 static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2889                         int rounding)
2890 {
2891     TCGv_ptr fpst = get_fpstatus_ptr(0);
2892     TCGv_i32 tcg_rmode;
2893
2894     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2895     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2896
2897     if (dp) {
2898         TCGv_i64 tcg_op;
2899         TCGv_i64 tcg_res;
2900         tcg_op = tcg_temp_new_i64();
2901         tcg_res = tcg_temp_new_i64();
2902         tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2903         gen_helper_rintd(tcg_res, tcg_op, fpst);
2904         tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2905         tcg_temp_free_i64(tcg_op);
2906         tcg_temp_free_i64(tcg_res);
2907     } else {
2908         TCGv_i32 tcg_op;
2909         TCGv_i32 tcg_res;
2910         tcg_op = tcg_temp_new_i32();
2911         tcg_res = tcg_temp_new_i32();
2912         tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2913         gen_helper_rints(tcg_res, tcg_op, fpst);
2914         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2915         tcg_temp_free_i32(tcg_op);
2916         tcg_temp_free_i32(tcg_res);
2917     }
2918
2919     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2920     tcg_temp_free_i32(tcg_rmode);
2921
2922     tcg_temp_free_ptr(fpst);
2923     return 0;
2924 }
2925
2926 static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2927                        int rounding)
2928 {
2929     bool is_signed = extract32(insn, 7, 1);
2930     TCGv_ptr fpst = get_fpstatus_ptr(0);
2931     TCGv_i32 tcg_rmode, tcg_shift;
2932
2933     tcg_shift = tcg_const_i32(0);
2934
2935     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2936     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2937
2938     if (dp) {
2939         TCGv_i64 tcg_double, tcg_res;
2940         TCGv_i32 tcg_tmp;
2941         /* Rd is encoded as a single precision register even when the source
2942          * is double precision.
2943          */
2944         rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
2945         tcg_double = tcg_temp_new_i64();
2946         tcg_res = tcg_temp_new_i64();
2947         tcg_tmp = tcg_temp_new_i32();
2948         tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
2949         if (is_signed) {
2950             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
2951         } else {
2952             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
2953         }
2954         tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res);
2955         tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
2956         tcg_temp_free_i32(tcg_tmp);
2957         tcg_temp_free_i64(tcg_res);
2958         tcg_temp_free_i64(tcg_double);
2959     } else {
2960         TCGv_i32 tcg_single, tcg_res;
2961         tcg_single = tcg_temp_new_i32();
2962         tcg_res = tcg_temp_new_i32();
2963         tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
2964         if (is_signed) {
2965             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
2966         } else {
2967             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
2968         }
2969         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
2970         tcg_temp_free_i32(tcg_res);
2971         tcg_temp_free_i32(tcg_single);
2972     }
2973
2974     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2975     tcg_temp_free_i32(tcg_rmode);
2976
2977     tcg_temp_free_i32(tcg_shift);
2978
2979     tcg_temp_free_ptr(fpst);
2980
2981     return 0;
2982 }
2983
2984 /* Table for converting the most common AArch32 encoding of
2985  * rounding mode to arm_fprounding order (which matches the
2986  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
2987  */
2988 static const uint8_t fp_decode_rm[] = {
2989     FPROUNDING_TIEAWAY,
2990     FPROUNDING_TIEEVEN,
2991     FPROUNDING_POSINF,
2992     FPROUNDING_NEGINF,
2993 };
2994
2995 static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
2996 {
2997     uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
2998
2999     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3000         return 1;
3001     }
3002
3003     if (dp) {
3004         VFP_DREG_D(rd, insn);
3005         VFP_DREG_N(rn, insn);
3006         VFP_DREG_M(rm, insn);
3007     } else {
3008         rd = VFP_SREG_D(insn);
3009         rn = VFP_SREG_N(insn);
3010         rm = VFP_SREG_M(insn);
3011     }
3012
3013     if ((insn & 0x0f800e50) == 0x0e000a00) {
3014         return handle_vsel(insn, rd, rn, rm, dp);
3015     } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
3016         return handle_vminmaxnm(insn, rd, rn, rm, dp);
3017     } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
3018         /* VRINTA, VRINTN, VRINTP, VRINTM */
3019         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3020         return handle_vrint(insn, rd, rm, dp, rounding);
3021     } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
3022         /* VCVTA, VCVTN, VCVTP, VCVTM */
3023         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3024         return handle_vcvt(insn, rd, rm, dp, rounding);
3025     }
3026     return 1;
3027 }
3028
3029 /* Disassemble a VFP instruction.  Returns nonzero if an error occurred
3030    (ie. an undefined instruction).  */
3031 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
3032 {
3033     uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
3034     int dp, veclen;
3035     TCGv_i32 addr;
3036     TCGv_i32 tmp;
3037     TCGv_i32 tmp2;
3038
3039     if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
3040         return 1;
3041     }
3042
3043     /* FIXME: this access check should not take precedence over UNDEF
3044      * for invalid encodings; we will generate incorrect syndrome information
3045      * for attempts to execute invalid vfp/neon encodings with FP disabled.
3046      */
3047     if (s->fp_excp_el) {
3048         gen_exception_insn(s, 4, EXCP_UDEF,
3049                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
3050         return 0;
3051     }
3052
3053     if (!s->vfp_enabled) {
3054         /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
3055         if ((insn & 0x0fe00fff) != 0x0ee00a10)
3056             return 1;
3057         rn = (insn >> 16) & 0xf;
3058         if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
3059             && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
3060             return 1;
3061         }
3062     }
3063
3064     if (extract32(insn, 28, 4) == 0xf) {
3065         /* Encodings with T=1 (Thumb) or unconditional (ARM):
3066          * only used in v8 and above.
3067          */
3068         return disas_vfp_v8_insn(s, insn);
3069     }
3070
3071     dp = ((insn & 0xf00) == 0xb00);
3072     switch ((insn >> 24) & 0xf) {
3073     case 0xe:
3074         if (insn & (1 << 4)) {
3075             /* single register transfer */
3076             rd = (insn >> 12) & 0xf;
3077             if (dp) {
3078                 int size;
3079                 int pass;
3080
3081                 VFP_DREG_N(rn, insn);
3082                 if (insn & 0xf)
3083                     return 1;
3084                 if (insn & 0x00c00060
3085                     && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
3086                     return 1;
3087                 }
3088
3089                 pass = (insn >> 21) & 1;
3090                 if (insn & (1 << 22)) {
3091                     size = 0;
3092                     offset = ((insn >> 5) & 3) * 8;
3093                 } else if (insn & (1 << 5)) {
3094                     size = 1;
3095                     offset = (insn & (1 << 6)) ? 16 : 0;
3096                 } else {
3097                     size = 2;
3098                     offset = 0;
3099                 }
3100                 if (insn & ARM_CP_RW_BIT) {
3101                     /* vfp->arm */
3102                     tmp = neon_load_reg(rn, pass);
3103                     switch (size) {
3104                     case 0:
3105                         if (offset)
3106                             tcg_gen_shri_i32(tmp, tmp, offset);
3107                         if (insn & (1 << 23))
3108                             gen_uxtb(tmp);
3109                         else
3110                             gen_sxtb(tmp);
3111                         break;
3112                     case 1:
3113                         if (insn & (1 << 23)) {
3114                             if (offset) {
3115                                 tcg_gen_shri_i32(tmp, tmp, 16);
3116                             } else {
3117                                 gen_uxth(tmp);
3118                             }
3119                         } else {
3120                             if (offset) {
3121                                 tcg_gen_sari_i32(tmp, tmp, 16);
3122                             } else {
3123                                 gen_sxth(tmp);
3124                             }
3125                         }
3126                         break;
3127                     case 2:
3128                         break;
3129                     }
3130                     store_reg(s, rd, tmp);
3131                 } else {
3132                     /* arm->vfp */
3133                     tmp = load_reg(s, rd);
3134                     if (insn & (1 << 23)) {
3135                         /* VDUP */
3136                         if (size == 0) {
3137                             gen_neon_dup_u8(tmp, 0);
3138                         } else if (size == 1) {
3139                             gen_neon_dup_low16(tmp);
3140                         }
3141                         for (n = 0; n <= pass * 2; n++) {
3142                             tmp2 = tcg_temp_new_i32();
3143                             tcg_gen_mov_i32(tmp2, tmp);
3144                             neon_store_reg(rn, n, tmp2);
3145                         }
3146                         neon_store_reg(rn, n, tmp);
3147                     } else {
3148                         /* VMOV */
3149                         switch (size) {
3150                         case 0:
3151                             tmp2 = neon_load_reg(rn, pass);
3152                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
3153                             tcg_temp_free_i32(tmp2);
3154                             break;
3155                         case 1:
3156                             tmp2 = neon_load_reg(rn, pass);
3157                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
3158                             tcg_temp_free_i32(tmp2);
3159                             break;
3160                         case 2:
3161                             break;
3162                         }
3163                         neon_store_reg(rn, pass, tmp);
3164                     }
3165                 }
3166             } else { /* !dp */
3167                 if ((insn & 0x6f) != 0x00)
3168                     return 1;
3169                 rn = VFP_SREG_N(insn);
3170                 if (insn & ARM_CP_RW_BIT) {
3171                     /* vfp->arm */
3172                     if (insn & (1 << 21)) {
3173                         /* system register */
3174                         rn >>= 1;
3175
3176                         switch (rn) {
3177                         case ARM_VFP_FPSID:
3178                             /* VFP2 allows access to FSID from userspace.
3179                                VFP3 restricts all id registers to privileged
3180                                accesses.  */
3181                             if (IS_USER(s)
3182                                 && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3183                                 return 1;
3184                             }
3185                             tmp = load_cpu_field(vfp.xregs[rn]);
3186                             break;
3187                         case ARM_VFP_FPEXC:
3188                             if (IS_USER(s))
3189                                 return 1;
3190                             tmp = load_cpu_field(vfp.xregs[rn]);
3191                             break;
3192                         case ARM_VFP_FPINST:
3193                         case ARM_VFP_FPINST2:
3194                             /* Not present in VFP3.  */
3195                             if (IS_USER(s)
3196                                 || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3197                                 return 1;
3198                             }
3199                             tmp = load_cpu_field(vfp.xregs[rn]);
3200                             break;
3201                         case ARM_VFP_FPSCR:
3202                             if (rd == 15) {
3203                                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
3204                                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
3205                             } else {
3206                                 tmp = tcg_temp_new_i32();
3207                                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
3208                             }
3209                             break;
3210                         case ARM_VFP_MVFR2:
3211                             if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3212                                 return 1;
3213                             }
3214                             /* fall through */
3215                         case ARM_VFP_MVFR0:
3216                         case ARM_VFP_MVFR1:
3217                             if (IS_USER(s)
3218                                 || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
3219                                 return 1;
3220                             }
3221                             tmp = load_cpu_field(vfp.xregs[rn]);
3222                             break;
3223                         default:
3224                             return 1;
3225                         }
3226                     } else {
3227                         gen_mov_F0_vreg(0, rn);
3228                         tmp = gen_vfp_mrs();
3229                     }
3230                     if (rd == 15) {
3231                         /* Set the 4 flag bits in the CPSR.  */
3232                         gen_set_nzcv(tmp);
3233                         tcg_temp_free_i32(tmp);
3234                     } else {
3235                         store_reg(s, rd, tmp);
3236                     }
3237                 } else {
3238                     /* arm->vfp */
3239                     if (insn & (1 << 21)) {
3240                         rn >>= 1;
3241                         /* system register */
3242                         switch (rn) {
3243                         case ARM_VFP_FPSID:
3244                         case ARM_VFP_MVFR0:
3245                         case ARM_VFP_MVFR1:
3246                             /* Writes are ignored.  */
3247                             break;
3248                         case ARM_VFP_FPSCR:
3249                             tmp = load_reg(s, rd);
3250                             gen_helper_vfp_set_fpscr(cpu_env, tmp);
3251                             tcg_temp_free_i32(tmp);
3252                             gen_lookup_tb(s);
3253                             break;
3254                         case ARM_VFP_FPEXC:
3255                             if (IS_USER(s))
3256                                 return 1;
3257                             /* TODO: VFP subarchitecture support.
3258                              * For now, keep the EN bit only */
3259                             tmp = load_reg(s, rd);
3260                             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
3261                             store_cpu_field(tmp, vfp.xregs[rn]);
3262                             gen_lookup_tb(s);
3263                             break;
3264                         case ARM_VFP_FPINST:
3265                         case ARM_VFP_FPINST2:
3266                             if (IS_USER(s)) {
3267                                 return 1;
3268                             }
3269                             tmp = load_reg(s, rd);
3270                             store_cpu_field(tmp, vfp.xregs[rn]);
3271                             break;
3272                         default:
3273                             return 1;
3274                         }
3275                     } else {
3276                         tmp = load_reg(s, rd);
3277                         gen_vfp_msr(tmp);
3278                         gen_mov_vreg_F0(0, rn);
3279                     }
3280                 }
3281             }
3282         } else {
3283             /* data processing */
3284             /* The opcode is in bits 23, 21, 20 and 6.  */
3285             op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
3286             if (dp) {
3287                 if (op == 15) {
3288                     /* rn is opcode */
3289                     rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
3290                 } else {
3291                     /* rn is register number */
3292                     VFP_DREG_N(rn, insn);
3293                 }
3294
3295                 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
3296                                  ((rn & 0x1e) == 0x6))) {
3297                     /* Integer or single/half precision destination.  */
3298                     rd = VFP_SREG_D(insn);
3299                 } else {
3300                     VFP_DREG_D(rd, insn);
3301                 }
3302                 if (op == 15 &&
3303                     (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
3304                      ((rn & 0x1e) == 0x4))) {
3305                     /* VCVT from int or half precision is always from S reg
3306                      * regardless of dp bit. VCVT with immediate frac_bits
3307                      * has same format as SREG_M.
3308                      */
3309                     rm = VFP_SREG_M(insn);
3310                 } else {
3311                     VFP_DREG_M(rm, insn);
3312                 }
3313             } else {
3314                 rn = VFP_SREG_N(insn);
3315                 if (op == 15 && rn == 15) {
3316                     /* Double precision destination.  */
3317                     VFP_DREG_D(rd, insn);
3318                 } else {
3319                     rd = VFP_SREG_D(insn);
3320                 }
3321                 /* NB that we implicitly rely on the encoding for the frac_bits
3322                  * in VCVT of fixed to float being the same as that of an SREG_M
3323                  */
3324                 rm = VFP_SREG_M(insn);
3325             }
3326
3327             veclen = s->vec_len;
3328             if (op == 15 && rn > 3)
3329                 veclen = 0;
3330
3331             /* Shut up compiler warnings.  */
3332             delta_m = 0;
3333             delta_d = 0;
3334             bank_mask = 0;
3335
3336             if (veclen > 0) {
3337                 if (dp)
3338                     bank_mask = 0xc;
3339                 else
3340                     bank_mask = 0x18;
3341
3342                 /* Figure out what type of vector operation this is.  */
3343                 if ((rd & bank_mask) == 0) {
3344                     /* scalar */
3345                     veclen = 0;
3346                 } else {
3347                     if (dp)
3348                         delta_d = (s->vec_stride >> 1) + 1;
3349                     else
3350                         delta_d = s->vec_stride + 1;
3351
3352                     if ((rm & bank_mask) == 0) {
3353                         /* mixed scalar/vector */
3354                         delta_m = 0;
3355                     } else {
3356                         /* vector */
3357                         delta_m = delta_d;
3358                     }
3359                 }
3360             }
3361
3362             /* Load the initial operands.  */
3363             if (op == 15) {
3364                 switch (rn) {
3365                 case 16:
3366                 case 17:
3367                     /* Integer source */
3368                     gen_mov_F0_vreg(0, rm);
3369                     break;
3370                 case 8:
3371                 case 9:
3372                     /* Compare */
3373                     gen_mov_F0_vreg(dp, rd);
3374                     gen_mov_F1_vreg(dp, rm);
3375                     break;
3376                 case 10:
3377                 case 11:
3378                     /* Compare with zero */
3379                     gen_mov_F0_vreg(dp, rd);
3380                     gen_vfp_F1_ld0(dp);
3381                     break;
3382                 case 20:
3383                 case 21:
3384                 case 22:
3385                 case 23:
3386                 case 28:
3387                 case 29:
3388                 case 30:
3389                 case 31:
3390                     /* Source and destination the same.  */
3391                     gen_mov_F0_vreg(dp, rd);
3392                     break;
3393                 case 4:
3394                 case 5:
3395                 case 6:
3396                 case 7:
3397                     /* VCVTB, VCVTT: only present with the halfprec extension
3398                      * UNPREDICTABLE if bit 8 is set prior to ARMv8
3399                      * (we choose to UNDEF)
3400                      */
3401                     if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
3402                         !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
3403                         return 1;
3404                     }
3405                     if (!extract32(rn, 1, 1)) {
3406                         /* Half precision source.  */
3407                         gen_mov_F0_vreg(0, rm);
3408                         break;
3409                     }
3410                     /* Otherwise fall through */
3411                 default:
3412                     /* One source operand.  */
3413                     gen_mov_F0_vreg(dp, rm);
3414                     break;
3415                 }
3416             } else {
3417                 /* Two source operands.  */
3418                 gen_mov_F0_vreg(dp, rn);
3419                 gen_mov_F1_vreg(dp, rm);
3420             }
3421
3422             for (;;) {
3423                 /* Perform the calculation.  */
3424                 switch (op) {
3425                 case 0: /* VMLA: fd + (fn * fm) */
3426                     /* Note that order of inputs to the add matters for NaNs */
3427                     gen_vfp_F1_mul(dp);
3428                     gen_mov_F0_vreg(dp, rd);
3429                     gen_vfp_add(dp);
3430                     break;
3431                 case 1: /* VMLS: fd + -(fn * fm) */
3432                     gen_vfp_mul(dp);
3433                     gen_vfp_F1_neg(dp);
3434                     gen_mov_F0_vreg(dp, rd);
3435                     gen_vfp_add(dp);
3436                     break;
3437                 case 2: /* VNMLS: -fd + (fn * fm) */
3438                     /* Note that it isn't valid to replace (-A + B) with (B - A)
3439                      * or similar plausible looking simplifications
3440                      * because this will give wrong results for NaNs.
3441                      */
3442                     gen_vfp_F1_mul(dp);
3443                     gen_mov_F0_vreg(dp, rd);
3444                     gen_vfp_neg(dp);
3445                     gen_vfp_add(dp);
3446                     break;
3447                 case 3: /* VNMLA: -fd + -(fn * fm) */
3448                     gen_vfp_mul(dp);
3449                     gen_vfp_F1_neg(dp);
3450                     gen_mov_F0_vreg(dp, rd);
3451                     gen_vfp_neg(dp);
3452                     gen_vfp_add(dp);
3453                     break;
3454                 case 4: /* mul: fn * fm */
3455                     gen_vfp_mul(dp);
3456                     break;
3457                 case 5: /* nmul: -(fn * fm) */
3458                     gen_vfp_mul(dp);
3459                     gen_vfp_neg(dp);
3460                     break;
3461                 case 6: /* add: fn + fm */
3462                     gen_vfp_add(dp);
3463                     break;
3464                 case 7: /* sub: fn - fm */
3465                     gen_vfp_sub(dp);
3466                     break;
3467                 case 8: /* div: fn / fm */
3468                     gen_vfp_div(dp);
3469                     break;
3470                 case 10: /* VFNMA : fd = muladd(-fd,  fn, fm) */
3471                 case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
3472                 case 12: /* VFMA  : fd = muladd( fd,  fn, fm) */
3473                 case 13: /* VFMS  : fd = muladd( fd, -fn, fm) */
3474                     /* These are fused multiply-add, and must be done as one
3475                      * floating point operation with no rounding between the
3476                      * multiplication and addition steps.
3477                      * NB that doing the negations here as separate steps is
3478                      * correct : an input NaN should come out with its sign bit
3479                      * flipped if it is a negated-input.
3480                      */
3481                     if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
3482                         return 1;
3483                     }
3484                     if (dp) {
3485                         TCGv_ptr fpst;
3486                         TCGv_i64 frd;
3487                         if (op & 1) {
3488                             /* VFNMS, VFMS */
3489                             gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
3490                         }
3491                         frd = tcg_temp_new_i64();
3492                         tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
3493                         if (op & 2) {
3494                             /* VFNMA, VFNMS */
3495                             gen_helper_vfp_negd(frd, frd);
3496                         }
3497                         fpst = get_fpstatus_ptr(0);
3498                         gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
3499                                                cpu_F1d, frd, fpst);
3500                         tcg_temp_free_ptr(fpst);
3501                         tcg_temp_free_i64(frd);
3502                     } else {
3503                         TCGv_ptr fpst;
3504                         TCGv_i32 frd;
3505                         if (op & 1) {
3506                             /* VFNMS, VFMS */
3507                             gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
3508                         }
3509                         frd = tcg_temp_new_i32();
3510                         tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
3511                         if (op & 2) {
3512                             gen_helper_vfp_negs(frd, frd);
3513                         }
3514                         fpst = get_fpstatus_ptr(0);
3515                         gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3516                                                cpu_F1s, frd, fpst);
3517                         tcg_temp_free_ptr(fpst);
3518                         tcg_temp_free_i32(frd);
3519                     }
3520                     break;
3521                 case 14: /* fconst */
3522                     if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3523                         return 1;
3524                     }
3525
3526                     n = (insn << 12) & 0x80000000;
3527                     i = ((insn >> 12) & 0x70) | (insn & 0xf);
3528                     if (dp) {
3529                         if (i & 0x40)
3530                             i |= 0x3f80;
3531                         else
3532                             i |= 0x4000;
3533                         n |= i << 16;
3534                         tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3535                     } else {
3536                         if (i & 0x40)
3537                             i |= 0x780;
3538                         else
3539                             i |= 0x800;
3540                         n |= i << 19;
3541                         tcg_gen_movi_i32(cpu_F0s, n);
3542                     }
3543                     break;
3544                 case 15: /* extension space */
3545                     switch (rn) {
3546                     case 0: /* cpy */
3547                         /* no-op */
3548                         break;
3549                     case 1: /* abs */
3550                         gen_vfp_abs(dp);
3551                         break;
3552                     case 2: /* neg */
3553                         gen_vfp_neg(dp);
3554                         break;
3555                     case 3: /* sqrt */
3556                         gen_vfp_sqrt(dp);
3557                         break;
3558                     case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
3559                         tmp = gen_vfp_mrs();
3560                         tcg_gen_ext16u_i32(tmp, tmp);
3561                         if (dp) {
3562                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3563                                                            cpu_env);
3564                         } else {
3565                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3566                                                            cpu_env);
3567                         }
3568                         tcg_temp_free_i32(tmp);
3569                         break;
3570                     case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
3571                         tmp = gen_vfp_mrs();
3572                         tcg_gen_shri_i32(tmp, tmp, 16);
3573                         if (dp) {
3574                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3575                                                            cpu_env);
3576                         } else {
3577                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3578                                                            cpu_env);
3579                         }
3580                         tcg_temp_free_i32(tmp);
3581                         break;
3582                     case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
3583                         tmp = tcg_temp_new_i32();
3584                         if (dp) {
3585                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3586                                                            cpu_env);
3587                         } else {
3588                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3589                                                            cpu_env);
3590                         }
3591                         gen_mov_F0_vreg(0, rd);
3592                         tmp2 = gen_vfp_mrs();
3593                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3594                         tcg_gen_or_i32(tmp, tmp, tmp2);
3595                         tcg_temp_free_i32(tmp2);
3596                         gen_vfp_msr(tmp);
3597                         break;
3598                     case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
3599                         tmp = tcg_temp_new_i32();
3600                         if (dp) {
3601                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3602                                                            cpu_env);
3603                         } else {
3604                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3605                                                            cpu_env);
3606                         }
3607                         tcg_gen_shli_i32(tmp, tmp, 16);
3608                         gen_mov_F0_vreg(0, rd);
3609                         tmp2 = gen_vfp_mrs();
3610                         tcg_gen_ext16u_i32(tmp2, tmp2);
3611                         tcg_gen_or_i32(tmp, tmp, tmp2);
3612                         tcg_temp_free_i32(tmp2);
3613                         gen_vfp_msr(tmp);
3614                         break;
3615                     case 8: /* cmp */
3616                         gen_vfp_cmp(dp);
3617                         break;
3618                     case 9: /* cmpe */
3619                         gen_vfp_cmpe(dp);
3620                         break;
3621                     case 10: /* cmpz */
3622                         gen_vfp_cmp(dp);
3623                         break;
3624                     case 11: /* cmpez */
3625                         gen_vfp_F1_ld0(dp);
3626                         gen_vfp_cmpe(dp);
3627                         break;
3628                     case 12: /* vrintr */
3629                     {
3630                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3631                         if (dp) {
3632                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3633                         } else {
3634                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3635                         }
3636                         tcg_temp_free_ptr(fpst);
3637                         break;
3638                     }
3639                     case 13: /* vrintz */
3640                     {
3641                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3642                         TCGv_i32 tcg_rmode;
3643                         tcg_rmode = tcg_const_i32(float_round_to_zero);
3644                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3645                         if (dp) {
3646                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3647                         } else {
3648                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3649                         }
3650                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3651                         tcg_temp_free_i32(tcg_rmode);
3652                         tcg_temp_free_ptr(fpst);
3653                         break;
3654                     }
3655                     case 14: /* vrintx */
3656                     {
3657                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3658                         if (dp) {
3659                             gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
3660                         } else {
3661                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
3662                         }
3663                         tcg_temp_free_ptr(fpst);
3664                         break;
3665                     }
3666                     case 15: /* single<->double conversion */
3667                         if (dp)
3668                             gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3669                         else
3670                             gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3671                         break;
3672                     case 16: /* fuito */
3673                         gen_vfp_uito(dp, 0);
3674                         break;
3675                     case 17: /* fsito */
3676                         gen_vfp_sito(dp, 0);
3677                         break;
3678                     case 20: /* fshto */
3679                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3680                             return 1;
3681                         }
3682                         gen_vfp_shto(dp, 16 - rm, 0);
3683                         break;
3684                     case 21: /* fslto */
3685                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3686                             return 1;
3687                         }
3688                         gen_vfp_slto(dp, 32 - rm, 0);
3689                         break;
3690                     case 22: /* fuhto */
3691                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3692                             return 1;
3693                         }
3694                         gen_vfp_uhto(dp, 16 - rm, 0);
3695                         break;
3696                     case 23: /* fulto */
3697                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3698                             return 1;
3699                         }
3700                         gen_vfp_ulto(dp, 32 - rm, 0);
3701                         break;
3702                     case 24: /* ftoui */
3703                         gen_vfp_toui(dp, 0);
3704                         break;
3705                     case 25: /* ftouiz */
3706                         gen_vfp_touiz(dp, 0);
3707                         break;
3708                     case 26: /* ftosi */
3709                         gen_vfp_tosi(dp, 0);
3710                         break;
3711                     case 27: /* ftosiz */
3712                         gen_vfp_tosiz(dp, 0);
3713                         break;
3714                     case 28: /* ftosh */
3715                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3716                             return 1;
3717                         }
3718                         gen_vfp_tosh(dp, 16 - rm, 0);
3719                         break;
3720                     case 29: /* ftosl */
3721                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3722                             return 1;
3723                         }
3724                         gen_vfp_tosl(dp, 32 - rm, 0);
3725                         break;
3726                     case 30: /* ftouh */
3727                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3728                             return 1;
3729                         }
3730                         gen_vfp_touh(dp, 16 - rm, 0);
3731                         break;
3732                     case 31: /* ftoul */
3733                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3734                             return 1;
3735                         }
3736                         gen_vfp_toul(dp, 32 - rm, 0);
3737                         break;
3738                     default: /* undefined */
3739                         return 1;
3740                     }
3741                     break;
3742                 default: /* undefined */
3743                     return 1;
3744                 }
3745
3746                 /* Write back the result.  */
3747                 if (op == 15 && (rn >= 8 && rn <= 11)) {
3748                     /* Comparison, do nothing.  */
3749                 } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
3750                                               (rn & 0x1e) == 0x6)) {
3751                     /* VCVT double to int: always integer result.
3752                      * VCVT double to half precision is always a single
3753                      * precision result.
3754                      */
3755                     gen_mov_vreg_F0(0, rd);
3756                 } else if (op == 15 && rn == 15) {
3757                     /* conversion */
3758                     gen_mov_vreg_F0(!dp, rd);
3759                 } else {
3760                     gen_mov_vreg_F0(dp, rd);
3761                 }
3762
3763                 /* break out of the loop if we have finished  */
3764                 if (veclen == 0)
3765                     break;
3766
3767                 if (op == 15 && delta_m == 0) {
3768                     /* single source one-many */
3769                     while (veclen--) {
3770                         rd = ((rd + delta_d) & (bank_mask - 1))
3771                              | (rd & bank_mask);
3772                         gen_mov_vreg_F0(dp, rd);
3773                     }
3774                     break;
3775                 }
3776                 /* Setup the next operands.  */
3777                 veclen--;
3778                 rd = ((rd + delta_d) & (bank_mask - 1))
3779                      | (rd & bank_mask);
3780
3781                 if (op == 15) {
3782                     /* One source operand.  */
3783                     rm = ((rm + delta_m) & (bank_mask - 1))
3784                          | (rm & bank_mask);
3785                     gen_mov_F0_vreg(dp, rm);
3786                 } else {
3787                     /* Two source operands.  */
3788                     rn = ((rn + delta_d) & (bank_mask - 1))
3789                          | (rn & bank_mask);
3790                     gen_mov_F0_vreg(dp, rn);
3791                     if (delta_m) {
3792                         rm = ((rm + delta_m) & (bank_mask - 1))
3793                              | (rm & bank_mask);
3794                         gen_mov_F1_vreg(dp, rm);
3795                     }
3796                 }
3797             }
3798         }
3799         break;
3800     case 0xc:
3801     case 0xd:
3802         if ((insn & 0x03e00000) == 0x00400000) {
3803             /* two-register transfer */
3804             rn = (insn >> 16) & 0xf;
3805             rd = (insn >> 12) & 0xf;
3806             if (dp) {
3807                 VFP_DREG_M(rm, insn);
3808             } else {
3809                 rm = VFP_SREG_M(insn);
3810             }
3811
3812             if (insn & ARM_CP_RW_BIT) {
3813                 /* vfp->arm */
3814                 if (dp) {
3815                     gen_mov_F0_vreg(0, rm * 2);
3816                     tmp = gen_vfp_mrs();
3817                     store_reg(s, rd, tmp);
3818                     gen_mov_F0_vreg(0, rm * 2 + 1);
3819                     tmp = gen_vfp_mrs();
3820                     store_reg(s, rn, tmp);
3821                 } else {
3822                     gen_mov_F0_vreg(0, rm);
3823                     tmp = gen_vfp_mrs();
3824                     store_reg(s, rd, tmp);
3825                     gen_mov_F0_vreg(0, rm + 1);
3826                     tmp = gen_vfp_mrs();
3827                     store_reg(s, rn, tmp);
3828                 }
3829             } else {
3830                 /* arm->vfp */
3831                 if (dp) {
3832                     tmp = load_reg(s, rd);
3833                     gen_vfp_msr(tmp);
3834                     gen_mov_vreg_F0(0, rm * 2);
3835                     tmp = load_reg(s, rn);
3836                     gen_vfp_msr(tmp);
3837                     gen_mov_vreg_F0(0, rm * 2 + 1);
3838                 } else {
3839                     tmp = load_reg(s, rd);
3840                     gen_vfp_msr(tmp);
3841                     gen_mov_vreg_F0(0, rm);
3842                     tmp = load_reg(s, rn);
3843                     gen_vfp_msr(tmp);
3844                     gen_mov_vreg_F0(0, rm + 1);
3845                 }
3846             }
3847         } else {
3848             /* Load/store */
3849             rn = (insn >> 16) & 0xf;
3850             if (dp)
3851                 VFP_DREG_D(rd, insn);
3852             else
3853                 rd = VFP_SREG_D(insn);
3854             if ((insn & 0x01200000) == 0x01000000) {
3855                 /* Single load/store */
3856                 offset = (insn & 0xff) << 2;
3857                 if ((insn & (1 << 23)) == 0)
3858                     offset = -offset;
3859                 if (s->thumb && rn == 15) {
3860                     /* This is actually UNPREDICTABLE */
3861                     addr = tcg_temp_new_i32();
3862                     tcg_gen_movi_i32(addr, s->pc & ~2);
3863                 } else {
3864                     addr = load_reg(s, rn);
3865                 }
3866                 tcg_gen_addi_i32(addr, addr, offset);
3867                 if (insn & (1 << 20)) {
3868                     gen_vfp_ld(s, dp, addr);
3869                     gen_mov_vreg_F0(dp, rd);
3870                 } else {
3871                     gen_mov_F0_vreg(dp, rd);
3872                     gen_vfp_st(s, dp, addr);
3873                 }
3874                 tcg_temp_free_i32(addr);
3875             } else {
3876                 /* load/store multiple */
3877                 int w = insn & (1 << 21);
3878                 if (dp)
3879                     n = (insn >> 1) & 0x7f;
3880                 else
3881                     n = insn & 0xff;
3882
3883                 if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
3884                     /* P == U , W == 1  => UNDEF */
3885                     return 1;
3886                 }
3887                 if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
3888                     /* UNPREDICTABLE cases for bad immediates: we choose to
3889                      * UNDEF to avoid generating huge numbers of TCG ops
3890                      */
3891                     return 1;
3892                 }
3893                 if (rn == 15 && w) {
3894                     /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
3895                     return 1;
3896                 }
3897
3898                 if (s->thumb && rn == 15) {
3899                     /* This is actually UNPREDICTABLE */
3900                     addr = tcg_temp_new_i32();
3901                     tcg_gen_movi_i32(addr, s->pc & ~2);
3902                 } else {
3903                     addr = load_reg(s, rn);
3904                 }
3905                 if (insn & (1 << 24)) /* pre-decrement */
3906                     tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
3907
3908                 if (dp)
3909                     offset = 8;
3910                 else
3911                     offset = 4;
3912                 for (i = 0; i < n; i++) {
3913                     if (insn & ARM_CP_RW_BIT) {
3914                         /* load */
3915                         gen_vfp_ld(s, dp, addr);
3916                         gen_mov_vreg_F0(dp, rd + i);
3917                     } else {
3918                         /* store */
3919                         gen_mov_F0_vreg(dp, rd + i);
3920                         gen_vfp_st(s, dp, addr);
3921                     }
3922                     tcg_gen_addi_i32(addr, addr, offset);
3923                 }
3924                 if (w) {
3925                     /* writeback */
3926                     if (insn & (1 << 24))
3927                         offset = -offset * n;
3928                     else if (dp && (insn & 1))
3929                         offset = 4;
3930                     else
3931                         offset = 0;
3932
3933                     if (offset != 0)
3934                         tcg_gen_addi_i32(addr, addr, offset);
3935                     store_reg(s, rn, addr);
3936                 } else {
3937                     tcg_temp_free_i32(addr);
3938                 }
3939             }
3940         }
3941         break;
3942     default:
3943         /* Should never happen.  */
3944         return 1;
3945     }
3946     return 0;
3947 }
3948
3949 static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
3950 {
3951     TranslationBlock *tb;
3952
3953     tb = s->tb;
3954     if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3955         tcg_gen_goto_tb(n);
3956         gen_set_pc_im(s, dest);
3957         tcg_gen_exit_tb((uintptr_t)tb + n);
3958     } else {
3959         gen_set_pc_im(s, dest);
3960         tcg_gen_exit_tb(0);
3961     }
3962 }
3963
3964 static inline void gen_jmp (DisasContext *s, uint32_t dest)
3965 {
3966     if (unlikely(s->singlestep_enabled || s->ss_active)) {
3967         /* An indirect jump so that we still trigger the debug exception.  */
3968         if (s->thumb)
3969             dest |= 1;
3970         gen_bx_im(s, dest);
3971     } else {
3972         gen_goto_tb(s, 0, dest);
3973         s->is_jmp = DISAS_TB_JUMP;
3974     }
3975 }
3976
3977 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
3978 {
3979     if (x)
3980         tcg_gen_sari_i32(t0, t0, 16);
3981     else
3982         gen_sxth(t0);
3983     if (y)
3984         tcg_gen_sari_i32(t1, t1, 16);
3985     else
3986         gen_sxth(t1);
3987     tcg_gen_mul_i32(t0, t0, t1);
3988 }
3989
3990 /* Return the mask of PSR bits set by a MSR instruction.  */
3991 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
3992 {
3993     uint32_t mask;
3994
3995     mask = 0;
3996     if (flags & (1 << 0))
3997         mask |= 0xff;
3998     if (flags & (1 << 1))
3999         mask |= 0xff00;
4000     if (flags & (1 << 2))
4001         mask |= 0xff0000;
4002     if (flags & (1 << 3))
4003         mask |= 0xff000000;
4004
4005     /* Mask out undefined bits.  */
4006     mask &= ~CPSR_RESERVED;
4007     if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
4008         mask &= ~CPSR_T;
4009     }
4010     if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
4011         mask &= ~CPSR_Q; /* V5TE in reality*/
4012     }
4013     if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
4014         mask &= ~(CPSR_E | CPSR_GE);
4015     }
4016     if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
4017         mask &= ~CPSR_IT;
4018     }
4019     /* Mask out execution state and reserved bits.  */
4020     if (!spsr) {
4021         mask &= ~(CPSR_EXEC | CPSR_RESERVED);
4022     }
4023     /* Mask out privileged bits.  */
4024     if (IS_USER(s))
4025         mask &= CPSR_USER;
4026     return mask;
4027 }
4028
4029 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
4030 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
4031 {
4032     TCGv_i32 tmp;
4033     if (spsr) {
4034         /* ??? This is also undefined in system mode.  */
4035         if (IS_USER(s))
4036             return 1;
4037
4038         tmp = load_cpu_field(spsr);
4039         tcg_gen_andi_i32(tmp, tmp, ~mask);
4040         tcg_gen_andi_i32(t0, t0, mask);
4041         tcg_gen_or_i32(tmp, tmp, t0);
4042         store_cpu_field(tmp, spsr);
4043     } else {
4044         gen_set_cpsr(t0, mask);
4045     }
4046     tcg_temp_free_i32(t0);
4047     gen_lookup_tb(s);
4048     return 0;
4049 }
4050
4051 /* Returns nonzero if access to the PSR is not permitted.  */
4052 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
4053 {
4054     TCGv_i32 tmp;
4055     tmp = tcg_temp_new_i32();
4056     tcg_gen_movi_i32(tmp, val);
4057     return gen_set_psr(s, mask, spsr, tmp);
4058 }
4059
4060 /* Generate an old-style exception return. Marks pc as dead. */
4061 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
4062 {
4063     TCGv_i32 tmp;
4064     store_reg(s, 15, pc);
4065     tmp = load_cpu_field(spsr);
4066     gen_set_cpsr(tmp, CPSR_ERET_MASK);
4067     tcg_temp_free_i32(tmp);
4068     s->is_jmp = DISAS_UPDATE;
4069 }
4070
4071 /* Generate a v6 exception return.  Marks both values as dead.  */
4072 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
4073 {
4074     gen_set_cpsr(cpsr, CPSR_ERET_MASK);
4075     tcg_temp_free_i32(cpsr);
4076     store_reg(s, 15, pc);
4077     s->is_jmp = DISAS_UPDATE;
4078 }
4079
4080 static void gen_nop_hint(DisasContext *s, int val)
4081 {
4082     switch (val) {
4083     case 1: /* yield */
4084         gen_set_pc_im(s, s->pc);
4085         s->is_jmp = DISAS_YIELD;
4086         break;
4087     case 3: /* wfi */
4088         gen_set_pc_im(s, s->pc);
4089         s->is_jmp = DISAS_WFI;
4090         break;
4091     case 2: /* wfe */
4092         gen_set_pc_im(s, s->pc);
4093         s->is_jmp = DISAS_WFE;
4094         break;
4095     case 4: /* sev */
4096     case 5: /* sevl */
4097         /* TODO: Implement SEV, SEVL and WFE.  May help SMP performance.  */
4098     default: /* nop */
4099         break;
4100     }
4101 }
4102
4103 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
4104
4105 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
4106 {
4107     switch (size) {
4108     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
4109     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
4110     case 2: tcg_gen_add_i32(t0, t0, t1); break;
4111     default: abort();
4112     }
4113 }
4114
4115 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
4116 {
4117     switch (size) {
4118     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
4119     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
4120     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
4121     default: return;
4122     }
4123 }
4124
4125 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
4126 #define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
4127 #define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
4128 #define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
4129 #define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
4130
4131 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
4132     switch ((size << 1) | u) { \
4133     case 0: \
4134         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
4135         break; \
4136     case 1: \
4137         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
4138         break; \
4139     case 2: \
4140         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
4141         break; \
4142     case 3: \
4143         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
4144         break; \
4145     case 4: \
4146         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
4147         break; \
4148     case 5: \
4149         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
4150         break; \
4151     default: return 1; \
4152     }} while (0)
4153
4154 #define GEN_NEON_INTEGER_OP(name) do { \
4155     switch ((size << 1) | u) { \
4156     case 0: \
4157         gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
4158         break; \
4159     case 1: \
4160         gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
4161         break; \
4162     case 2: \
4163         gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
4164         break; \
4165     case 3: \
4166         gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
4167         break; \
4168     case 4: \
4169         gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
4170         break; \
4171     case 5: \
4172         gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
4173         break; \
4174     default: return 1; \
4175     }} while (0)
4176
4177 static TCGv_i32 neon_load_scratch(int scratch)
4178 {
4179     TCGv_i32 tmp = tcg_temp_new_i32();
4180     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4181     return tmp;
4182 }
4183
4184 static void neon_store_scratch(int scratch, TCGv_i32 var)
4185 {
4186     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4187     tcg_temp_free_i32(var);
4188 }
4189
4190 static inline TCGv_i32 neon_get_scalar(int size, int reg)
4191 {
4192     TCGv_i32 tmp;
4193     if (size == 1) {
4194         tmp = neon_load_reg(reg & 7, reg >> 4);
4195         if (reg & 8) {
4196             gen_neon_dup_high16(tmp);
4197         } else {
4198             gen_neon_dup_low16(tmp);
4199         }
4200     } else {
4201         tmp = neon_load_reg(reg & 15, reg >> 4);
4202     }
4203     return tmp;
4204 }
4205
4206 static int gen_neon_unzip(int rd, int rm, int size, int q)
4207 {
4208     TCGv_i32 tmp, tmp2;
4209     if (!q && size == 2) {
4210         return 1;
4211     }
4212     tmp = tcg_const_i32(rd);
4213     tmp2 = tcg_const_i32(rm);
4214     if (q) {
4215         switch (size) {
4216         case 0:
4217             gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
4218             break;
4219         case 1:
4220             gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
4221             break;
4222         case 2:
4223             gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
4224             break;
4225         default:
4226             abort();
4227         }
4228     } else {
4229         switch (size) {
4230         case 0:
4231             gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
4232             break;
4233         case 1:
4234             gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
4235             break;
4236         default:
4237             abort();
4238         }
4239     }
4240     tcg_temp_free_i32(tmp);
4241     tcg_temp_free_i32(tmp2);
4242     return 0;
4243 }
4244
4245 static int gen_neon_zip(int rd, int rm, int size, int q)
4246 {
4247     TCGv_i32 tmp, tmp2;
4248     if (!q && size == 2) {
4249         return 1;
4250     }
4251     tmp = tcg_const_i32(rd);
4252     tmp2 = tcg_const_i32(rm);
4253     if (q) {
4254         switch (size) {
4255         case 0:
4256             gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
4257             break;
4258         case 1:
4259             gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
4260             break;
4261         case 2:
4262             gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
4263             break;
4264         default:
4265             abort();
4266         }
4267     } else {
4268         switch (size) {
4269         case 0:
4270             gen_helper_neon_zip8(cpu_env, tmp, tmp2);
4271             break;
4272         case 1:
4273             gen_helper_neon_zip16(cpu_env, tmp, tmp2);
4274             break;
4275         default:
4276             abort();
4277         }
4278     }
4279     tcg_temp_free_i32(tmp);
4280     tcg_temp_free_i32(tmp2);
4281     return 0;
4282 }
4283
4284 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4285 {
4286     TCGv_i32 rd, tmp;
4287
4288     rd = tcg_temp_new_i32();
4289     tmp = tcg_temp_new_i32();
4290
4291     tcg_gen_shli_i32(rd, t0, 8);
4292     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4293     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4294     tcg_gen_or_i32(rd, rd, tmp);
4295
4296     tcg_gen_shri_i32(t1, t1, 8);
4297     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4298     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4299     tcg_gen_or_i32(t1, t1, tmp);
4300     tcg_gen_mov_i32(t0, rd);
4301
4302     tcg_temp_free_i32(tmp);
4303     tcg_temp_free_i32(rd);
4304 }
4305
4306 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4307 {
4308     TCGv_i32 rd, tmp;
4309
4310     rd = tcg_temp_new_i32();
4311     tmp = tcg_temp_new_i32();
4312
4313     tcg_gen_shli_i32(rd, t0, 16);
4314     tcg_gen_andi_i32(tmp, t1, 0xffff);
4315     tcg_gen_or_i32(rd, rd, tmp);
4316     tcg_gen_shri_i32(t1, t1, 16);
4317     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4318     tcg_gen_or_i32(t1, t1, tmp);
4319     tcg_gen_mov_i32(t0, rd);
4320
4321     tcg_temp_free_i32(tmp);
4322     tcg_temp_free_i32(rd);
4323 }
4324
4325
4326 static struct {
4327     int nregs;
4328     int interleave;
4329     int spacing;
4330 } neon_ls_element_type[11] = {
4331     {4, 4, 1},
4332     {4, 4, 2},
4333     {4, 1, 1},
4334     {4, 2, 1},
4335     {3, 3, 1},
4336     {3, 3, 2},
4337     {3, 1, 1},
4338     {1, 1, 1},
4339     {2, 2, 1},
4340     {2, 2, 2},
4341     {2, 1, 1}
4342 };
4343
4344 /* Translate a NEON load/store element instruction.  Return nonzero if the
4345    instruction is invalid.  */
4346 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
4347 {
4348     int rd, rn, rm;
4349     int op;
4350     int nregs;
4351     int interleave;
4352     int spacing;
4353     int stride;
4354     int size;
4355     int reg;
4356     int pass;
4357     int load;
4358     int shift;
4359     int n;
4360     TCGv_i32 addr;
4361     TCGv_i32 tmp;
4362     TCGv_i32 tmp2;
4363     TCGv_i64 tmp64;
4364
4365     /* FIXME: this access check should not take precedence over UNDEF
4366      * for invalid encodings; we will generate incorrect syndrome information
4367      * for attempts to execute invalid vfp/neon encodings with FP disabled.
4368      */
4369     if (s->fp_excp_el) {
4370         gen_exception_insn(s, 4, EXCP_UDEF,
4371                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
4372         return 0;
4373     }
4374
4375     if (!s->vfp_enabled)
4376       return 1;
4377     VFP_DREG_D(rd, insn);
4378     rn = (insn >> 16) & 0xf;
4379     rm = insn & 0xf;
4380     load = (insn & (1 << 21)) != 0;
4381     if ((insn & (1 << 23)) == 0) {
4382         /* Load store all elements.  */
4383         op = (insn >> 8) & 0xf;
4384         size = (insn >> 6) & 3;
4385         if (op > 10)
4386             return 1;
4387         /* Catch UNDEF cases for bad values of align field */
4388         switch (op & 0xc) {
4389         case 4:
4390             if (((insn >> 5) & 1) == 1) {
4391                 return 1;
4392             }
4393             break;
4394         case 8:
4395             if (((insn >> 4) & 3) == 3) {
4396                 return 1;
4397             }
4398             break;
4399         default:
4400             break;
4401         }
4402         nregs = neon_ls_element_type[op].nregs;
4403         interleave = neon_ls_element_type[op].interleave;
4404         spacing = neon_ls_element_type[op].spacing;
4405         if (size == 3 && (interleave | spacing) != 1)
4406             return 1;
4407         addr = tcg_temp_new_i32();
4408         load_reg_var(s, addr, rn);
4409         stride = (1 << size) * interleave;
4410         for (reg = 0; reg < nregs; reg++) {
4411             if (interleave > 2 || (interleave == 2 && nregs == 2)) {
4412                 load_reg_var(s, addr, rn);
4413                 tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
4414             } else if (interleave == 2 && nregs == 4 && reg == 2) {
4415                 load_reg_var(s, addr, rn);
4416                 tcg_gen_addi_i32(addr, addr, 1 << size);
4417             }
4418             if (size == 3) {
4419                 tmp64 = tcg_temp_new_i64();
4420                 if (load) {
4421                     gen_aa32_ld64(tmp64, addr, get_mem_index(s));
4422                     neon_store_reg64(tmp64, rd);
4423                 } else {
4424                     neon_load_reg64(tmp64, rd);
4425                     gen_aa32_st64(tmp64, addr, get_mem_index(s));
4426                 }
4427                 tcg_temp_free_i64(tmp64);
4428                 tcg_gen_addi_i32(addr, addr, stride);
4429             } else {
4430                 for (pass = 0; pass < 2; pass++) {
4431                     if (size == 2) {
4432                         if (load) {
4433                             tmp = tcg_temp_new_i32();
4434                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4435                             neon_store_reg(rd, pass, tmp);
4436                         } else {
4437                             tmp = neon_load_reg(rd, pass);
4438                             gen_aa32_st32(tmp, addr, get_mem_index(s));
4439                             tcg_temp_free_i32(tmp);
4440                         }
4441                         tcg_gen_addi_i32(addr, addr, stride);
4442                     } else if (size == 1) {
4443                         if (load) {
4444                             tmp = tcg_temp_new_i32();
4445                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4446                             tcg_gen_addi_i32(addr, addr, stride);
4447                             tmp2 = tcg_temp_new_i32();
4448                             gen_aa32_ld16u(tmp2, addr, get_mem_index(s));
4449                             tcg_gen_addi_i32(addr, addr, stride);
4450                             tcg_gen_shli_i32(tmp2, tmp2, 16);
4451                             tcg_gen_or_i32(tmp, tmp, tmp2);
4452                             tcg_temp_free_i32(tmp2);
4453                             neon_store_reg(rd, pass, tmp);
4454                         } else {
4455                             tmp = neon_load_reg(rd, pass);
4456                             tmp2 = tcg_temp_new_i32();
4457                             tcg_gen_shri_i32(tmp2, tmp, 16);
4458                             gen_aa32_st16(tmp, addr, get_mem_index(s));
4459                             tcg_temp_free_i32(tmp);
4460                             tcg_gen_addi_i32(addr, addr, stride);
4461                             gen_aa32_st16(tmp2, addr, get_mem_index(s));
4462                             tcg_temp_free_i32(tmp2);
4463                             tcg_gen_addi_i32(addr, addr, stride);
4464                         }
4465                     } else /* size == 0 */ {
4466                         if (load) {
4467                             TCGV_UNUSED_I32(tmp2);
4468                             for (n = 0; n < 4; n++) {
4469                                 tmp = tcg_temp_new_i32();
4470                                 gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4471                                 tcg_gen_addi_i32(addr, addr, stride);
4472                                 if (n == 0) {
4473                                     tmp2 = tmp;
4474                                 } else {
4475                                     tcg_gen_shli_i32(tmp, tmp, n * 8);
4476                                     tcg_gen_or_i32(tmp2, tmp2, tmp);
4477                                     tcg_temp_free_i32(tmp);
4478                                 }
4479                             }
4480                             neon_store_reg(rd, pass, tmp2);
4481                         } else {
4482                             tmp2 = neon_load_reg(rd, pass);
4483                             for (n = 0; n < 4; n++) {
4484                                 tmp = tcg_temp_new_i32();
4485                                 if (n == 0) {
4486                                     tcg_gen_mov_i32(tmp, tmp2);
4487                                 } else {
4488                                     tcg_gen_shri_i32(tmp, tmp2, n * 8);
4489                                 }
4490                                 gen_aa32_st8(tmp, addr, get_mem_index(s));
4491                                 tcg_temp_free_i32(tmp);
4492                                 tcg_gen_addi_i32(addr, addr, stride);
4493                             }
4494                             tcg_temp_free_i32(tmp2);
4495                         }
4496                     }
4497                 }
4498             }
4499             rd += spacing;
4500         }
4501         tcg_temp_free_i32(addr);
4502         stride = nregs * 8;
4503     } else {
4504         size = (insn >> 10) & 3;
4505         if (size == 3) {
4506             /* Load single element to all lanes.  */
4507             int a = (insn >> 4) & 1;
4508             if (!load) {
4509                 return 1;
4510             }
4511             size = (insn >> 6) & 3;
4512             nregs = ((insn >> 8) & 3) + 1;
4513
4514             if (size == 3) {
4515                 if (nregs != 4 || a == 0) {
4516                     return 1;
4517                 }
4518                 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
4519                 size = 2;
4520             }
4521             if (nregs == 1 && a == 1 && size == 0) {
4522                 return 1;
4523             }
4524             if (nregs == 3 && a == 1) {
4525                 return 1;
4526             }
4527             addr = tcg_temp_new_i32();
4528             load_reg_var(s, addr, rn);
4529             if (nregs == 1) {
4530                 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
4531                 tmp = gen_load_and_replicate(s, addr, size);
4532                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4533                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4534                 if (insn & (1 << 5)) {
4535                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
4536                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
4537                 }
4538                 tcg_temp_free_i32(tmp);
4539             } else {
4540                 /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
4541                 stride = (insn & (1 << 5)) ? 2 : 1;
4542                 for (reg = 0; reg < nregs; reg++) {
4543                     tmp = gen_load_and_replicate(s, addr, size);
4544                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4545                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4546                     tcg_temp_free_i32(tmp);
4547                     tcg_gen_addi_i32(addr, addr, 1 << size);
4548                     rd += stride;
4549                 }
4550             }
4551             tcg_temp_free_i32(addr);
4552             stride = (1 << size) * nregs;
4553         } else {
4554             /* Single element.  */
4555             int idx = (insn >> 4) & 0xf;
4556             pass = (insn >> 7) & 1;
4557             switch (size) {
4558             case 0:
4559                 shift = ((insn >> 5) & 3) * 8;
4560                 stride = 1;
4561                 break;
4562             case 1:
4563                 shift = ((insn >> 6) & 1) * 16;
4564                 stride = (insn & (1 << 5)) ? 2 : 1;
4565                 break;
4566             case 2:
4567                 shift = 0;
4568                 stride = (insn & (1 << 6)) ? 2 : 1;
4569                 break;
4570             default:
4571                 abort();
4572             }
4573             nregs = ((insn >> 8) & 3) + 1;
4574             /* Catch the UNDEF cases. This is unavoidably a bit messy. */
4575             switch (nregs) {
4576             case 1:
4577                 if (((idx & (1 << size)) != 0) ||
4578                     (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
4579                     return 1;
4580                 }
4581                 break;
4582             case 3:
4583                 if ((idx & 1) != 0) {
4584                     return 1;
4585                 }
4586                 /* fall through */
4587             case 2:
4588                 if (size == 2 && (idx & 2) != 0) {
4589                     return 1;
4590                 }
4591                 break;
4592             case 4:
4593                 if ((size == 2) && ((idx & 3) == 3)) {
4594                     return 1;
4595                 }
4596                 break;
4597             default:
4598                 abort();
4599             }
4600             if ((rd + stride * (nregs - 1)) > 31) {
4601                 /* Attempts to write off the end of the register file
4602                  * are UNPREDICTABLE; we choose to UNDEF because otherwise
4603                  * the neon_load_reg() would write off the end of the array.
4604                  */
4605                 return 1;
4606             }
4607             addr = tcg_temp_new_i32();
4608             load_reg_var(s, addr, rn);
4609             for (reg = 0; reg < nregs; reg++) {
4610                 if (load) {
4611                     tmp = tcg_temp_new_i32();
4612                     switch (size) {
4613                     case 0:
4614                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4615                         break;
4616                     case 1:
4617                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4618                         break;
4619                     case 2:
4620                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4621                         break;
4622                     default: /* Avoid compiler warnings.  */
4623                         abort();
4624                     }
4625                     if (size != 2) {
4626                         tmp2 = neon_load_reg(rd, pass);
4627                         tcg_gen_deposit_i32(tmp, tmp2, tmp,
4628                                             shift, size ? 16 : 8);
4629                         tcg_temp_free_i32(tmp2);
4630                     }
4631                     neon_store_reg(rd, pass, tmp);
4632                 } else { /* Store */
4633                     tmp = neon_load_reg(rd, pass);
4634                     if (shift)
4635                         tcg_gen_shri_i32(tmp, tmp, shift);
4636                     switch (size) {
4637                     case 0:
4638                         gen_aa32_st8(tmp, addr, get_mem_index(s));
4639                         break;
4640                     case 1:
4641                         gen_aa32_st16(tmp, addr, get_mem_index(s));
4642                         break;
4643                     case 2:
4644                         gen_aa32_st32(tmp, addr, get_mem_index(s));
4645                         break;
4646                     }
4647                     tcg_temp_free_i32(tmp);
4648                 }
4649                 rd += stride;
4650                 tcg_gen_addi_i32(addr, addr, 1 << size);
4651             }
4652             tcg_temp_free_i32(addr);
4653             stride = nregs * (1 << size);
4654         }
4655     }
4656     if (rm != 15) {
4657         TCGv_i32 base;
4658
4659         base = load_reg(s, rn);
4660         if (rm == 13) {
4661             tcg_gen_addi_i32(base, base, stride);
4662         } else {
4663             TCGv_i32 index;
4664             index = load_reg(s, rm);
4665             tcg_gen_add_i32(base, base, index);
4666             tcg_temp_free_i32(index);
4667         }
4668         store_reg(s, rn, base);
4669     }
4670     return 0;
4671 }
4672
4673 /* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
4674 static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
4675 {
4676     tcg_gen_and_i32(t, t, c);
4677     tcg_gen_andc_i32(f, f, c);
4678     tcg_gen_or_i32(dest, t, f);
4679 }
4680
4681 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
4682 {
4683     switch (size) {
4684     case 0: gen_helper_neon_narrow_u8(dest, src); break;
4685     case 1: gen_helper_neon_narrow_u16(dest, src); break;
4686     case 2: tcg_gen_trunc_i64_i32(dest, src); break;
4687     default: abort();
4688     }
4689 }
4690
4691 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4692 {
4693     switch (size) {
4694     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
4695     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
4696     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
4697     default: abort();
4698     }
4699 }
4700
4701 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
4702 {
4703     switch (size) {
4704     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
4705     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
4706     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
4707     default: abort();
4708     }
4709 }
4710
4711 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4712 {
4713     switch (size) {
4714     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
4715     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
4716     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
4717     default: abort();
4718     }
4719 }
4720
4721 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
4722                                          int q, int u)
4723 {
4724     if (q) {
4725         if (u) {
4726             switch (size) {
4727             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4728             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4729             default: abort();
4730             }
4731         } else {
4732             switch (size) {
4733             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
4734             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
4735             default: abort();
4736             }
4737         }
4738     } else {
4739         if (u) {
4740             switch (size) {
4741             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
4742             case 2: gen_helper_neon_shl_u32(var, var, shift); break;
4743             default: abort();
4744             }
4745         } else {
4746             switch (size) {
4747             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4748             case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4749             default: abort();
4750             }
4751         }
4752     }
4753 }
4754
4755 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
4756 {
4757     if (u) {
4758         switch (size) {
4759         case 0: gen_helper_neon_widen_u8(dest, src); break;
4760         case 1: gen_helper_neon_widen_u16(dest, src); break;
4761         case 2: tcg_gen_extu_i32_i64(dest, src); break;
4762         default: abort();
4763         }
4764     } else {
4765         switch (size) {
4766         case 0: gen_helper_neon_widen_s8(dest, src); break;
4767         case 1: gen_helper_neon_widen_s16(dest, src); break;
4768         case 2: tcg_gen_ext_i32_i64(dest, src); break;
4769         default: abort();
4770         }
4771     }
4772     tcg_temp_free_i32(src);
4773 }
4774
4775 static inline void gen_neon_addl(int size)
4776 {
4777     switch (size) {
4778     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4779     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4780     case 2: tcg_gen_add_i64(CPU_V001); break;
4781     default: abort();
4782     }
4783 }
4784
4785 static inline void gen_neon_subl(int size)
4786 {
4787     switch (size) {
4788     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4789     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4790     case 2: tcg_gen_sub_i64(CPU_V001); break;
4791     default: abort();
4792     }
4793 }
4794
4795 static inline void gen_neon_negl(TCGv_i64 var, int size)
4796 {
4797     switch (size) {
4798     case 0: gen_helper_neon_negl_u16(var, var); break;
4799     case 1: gen_helper_neon_negl_u32(var, var); break;
4800     case 2:
4801         tcg_gen_neg_i64(var, var);
4802         break;
4803     default: abort();
4804     }
4805 }
4806
4807 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4808 {
4809     switch (size) {
4810     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4811     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4812     default: abort();
4813     }
4814 }
4815
4816 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
4817                                  int size, int u)
4818 {
4819     TCGv_i64 tmp;
4820
4821     switch ((size << 1) | u) {
4822     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4823     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4824     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4825     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4826     case 4:
4827         tmp = gen_muls_i64_i32(a, b);
4828         tcg_gen_mov_i64(dest, tmp);
4829         tcg_temp_free_i64(tmp);
4830         break;
4831     case 5:
4832         tmp = gen_mulu_i64_i32(a, b);
4833         tcg_gen_mov_i64(dest, tmp);
4834         tcg_temp_free_i64(tmp);
4835         break;
4836     default: abort();
4837     }
4838
4839     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4840        Don't forget to clean them now.  */
4841     if (size < 2) {
4842         tcg_temp_free_i32(a);
4843         tcg_temp_free_i32(b);
4844     }
4845 }
4846
4847 static void gen_neon_narrow_op(int op, int u, int size,
4848                                TCGv_i32 dest, TCGv_i64 src)
4849 {
4850     if (op) {
4851         if (u) {
4852             gen_neon_unarrow_sats(size, dest, src);
4853         } else {
4854             gen_neon_narrow(size, dest, src);
4855         }
4856     } else {
4857         if (u) {
4858             gen_neon_narrow_satu(size, dest, src);
4859         } else {
4860             gen_neon_narrow_sats(size, dest, src);
4861         }
4862     }
4863 }
4864
4865 /* Symbolic constants for op fields for Neon 3-register same-length.
4866  * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4867  * table A7-9.
4868  */
4869 #define NEON_3R_VHADD 0
4870 #define NEON_3R_VQADD 1
4871 #define NEON_3R_VRHADD 2
4872 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4873 #define NEON_3R_VHSUB 4
4874 #define NEON_3R_VQSUB 5
4875 #define NEON_3R_VCGT 6
4876 #define NEON_3R_VCGE 7
4877 #define NEON_3R_VSHL 8
4878 #define NEON_3R_VQSHL 9
4879 #define NEON_3R_VRSHL 10
4880 #define NEON_3R_VQRSHL 11
4881 #define NEON_3R_VMAX 12
4882 #define NEON_3R_VMIN 13
4883 #define NEON_3R_VABD 14
4884 #define NEON_3R_VABA 15
4885 #define NEON_3R_VADD_VSUB 16
4886 #define NEON_3R_VTST_VCEQ 17
4887 #define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
4888 #define NEON_3R_VMUL 19
4889 #define NEON_3R_VPMAX 20
4890 #define NEON_3R_VPMIN 21
4891 #define NEON_3R_VQDMULH_VQRDMULH 22
4892 #define NEON_3R_VPADD 23
4893 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
4894 #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
4895 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4896 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4897 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4898 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4899 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4900 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
4901
4902 static const uint8_t neon_3r_sizes[] = {
4903     [NEON_3R_VHADD] = 0x7,
4904     [NEON_3R_VQADD] = 0xf,
4905     [NEON_3R_VRHADD] = 0x7,
4906     [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4907     [NEON_3R_VHSUB] = 0x7,
4908     [NEON_3R_VQSUB] = 0xf,
4909     [NEON_3R_VCGT] = 0x7,
4910     [NEON_3R_VCGE] = 0x7,
4911     [NEON_3R_VSHL] = 0xf,
4912     [NEON_3R_VQSHL] = 0xf,
4913     [NEON_3R_VRSHL] = 0xf,
4914     [NEON_3R_VQRSHL] = 0xf,
4915     [NEON_3R_VMAX] = 0x7,
4916     [NEON_3R_VMIN] = 0x7,
4917     [NEON_3R_VABD] = 0x7,
4918     [NEON_3R_VABA] = 0x7,
4919     [NEON_3R_VADD_VSUB] = 0xf,
4920     [NEON_3R_VTST_VCEQ] = 0x7,
4921     [NEON_3R_VML] = 0x7,
4922     [NEON_3R_VMUL] = 0x7,
4923     [NEON_3R_VPMAX] = 0x7,
4924     [NEON_3R_VPMIN] = 0x7,
4925     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4926     [NEON_3R_VPADD] = 0x7,
4927     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
4928     [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
4929     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4930     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4931     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4932     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4933     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4934     [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
4935 };
4936
4937 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
4938  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4939  * table A7-13.
4940  */
4941 #define NEON_2RM_VREV64 0
4942 #define NEON_2RM_VREV32 1
4943 #define NEON_2RM_VREV16 2
4944 #define NEON_2RM_VPADDL 4
4945 #define NEON_2RM_VPADDL_U 5
4946 #define NEON_2RM_AESE 6 /* Includes AESD */
4947 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
4948 #define NEON_2RM_VCLS 8
4949 #define NEON_2RM_VCLZ 9
4950 #define NEON_2RM_VCNT 10
4951 #define NEON_2RM_VMVN 11
4952 #define NEON_2RM_VPADAL 12
4953 #define NEON_2RM_VPADAL_U 13
4954 #define NEON_2RM_VQABS 14
4955 #define NEON_2RM_VQNEG 15
4956 #define NEON_2RM_VCGT0 16
4957 #define NEON_2RM_VCGE0 17
4958 #define NEON_2RM_VCEQ0 18
4959 #define NEON_2RM_VCLE0 19
4960 #define NEON_2RM_VCLT0 20
4961 #define NEON_2RM_SHA1H 21
4962 #define NEON_2RM_VABS 22
4963 #define NEON_2RM_VNEG 23
4964 #define NEON_2RM_VCGT0_F 24
4965 #define NEON_2RM_VCGE0_F 25
4966 #define NEON_2RM_VCEQ0_F 26
4967 #define NEON_2RM_VCLE0_F 27
4968 #define NEON_2RM_VCLT0_F 28
4969 #define NEON_2RM_VABS_F 30
4970 #define NEON_2RM_VNEG_F 31
4971 #define NEON_2RM_VSWP 32
4972 #define NEON_2RM_VTRN 33
4973 #define NEON_2RM_VUZP 34
4974 #define NEON_2RM_VZIP 35
4975 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
4976 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
4977 #define NEON_2RM_VSHLL 38
4978 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
4979 #define NEON_2RM_VRINTN 40
4980 #define NEON_2RM_VRINTX 41
4981 #define NEON_2RM_VRINTA 42
4982 #define NEON_2RM_VRINTZ 43
4983 #define NEON_2RM_VCVT_F16_F32 44
4984 #define NEON_2RM_VRINTM 45
4985 #define NEON_2RM_VCVT_F32_F16 46
4986 #define NEON_2RM_VRINTP 47
4987 #define NEON_2RM_VCVTAU 48
4988 #define NEON_2RM_VCVTAS 49
4989 #define NEON_2RM_VCVTNU 50
4990 #define NEON_2RM_VCVTNS 51
4991 #define NEON_2RM_VCVTPU 52
4992 #define NEON_2RM_VCVTPS 53
4993 #define NEON_2RM_VCVTMU 54
4994 #define NEON_2RM_VCVTMS 55
4995 #define NEON_2RM_VRECPE 56
4996 #define NEON_2RM_VRSQRTE 57
4997 #define NEON_2RM_VRECPE_F 58
4998 #define NEON_2RM_VRSQRTE_F 59
4999 #define NEON_2RM_VCVT_FS 60
5000 #define NEON_2RM_VCVT_FU 61
5001 #define NEON_2RM_VCVT_SF 62
5002 #define NEON_2RM_VCVT_UF 63
5003
5004 static int neon_2rm_is_float_op(int op)
5005 {
5006     /* Return true if this neon 2reg-misc op is float-to-float */
5007     return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
5008             (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
5009             op == NEON_2RM_VRINTM ||
5010             (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
5011             op >= NEON_2RM_VRECPE_F);
5012 }
5013
5014 /* Each entry in this array has bit n set if the insn allows
5015  * size value n (otherwise it will UNDEF). Since unallocated
5016  * op values will have no bits set they always UNDEF.
5017  */
5018 static const uint8_t neon_2rm_sizes[] = {
5019     [NEON_2RM_VREV64] = 0x7,
5020     [NEON_2RM_VREV32] = 0x3,
5021     [NEON_2RM_VREV16] = 0x1,
5022     [NEON_2RM_VPADDL] = 0x7,
5023     [NEON_2RM_VPADDL_U] = 0x7,
5024     [NEON_2RM_AESE] = 0x1,
5025     [NEON_2RM_AESMC] = 0x1,
5026     [NEON_2RM_VCLS] = 0x7,
5027     [NEON_2RM_VCLZ] = 0x7,
5028     [NEON_2RM_VCNT] = 0x1,
5029     [NEON_2RM_VMVN] = 0x1,
5030     [NEON_2RM_VPADAL] = 0x7,
5031     [NEON_2RM_VPADAL_U] = 0x7,
5032     [NEON_2RM_VQABS] = 0x7,
5033     [NEON_2RM_VQNEG] = 0x7,
5034     [NEON_2RM_VCGT0] = 0x7,
5035     [NEON_2RM_VCGE0] = 0x7,
5036     [NEON_2RM_VCEQ0] = 0x7,
5037     [NEON_2RM_VCLE0] = 0x7,
5038     [NEON_2RM_VCLT0] = 0x7,
5039     [NEON_2RM_SHA1H] = 0x4,
5040     [NEON_2RM_VABS] = 0x7,
5041     [NEON_2RM_VNEG] = 0x7,
5042     [NEON_2RM_VCGT0_F] = 0x4,
5043     [NEON_2RM_VCGE0_F] = 0x4,
5044     [NEON_2RM_VCEQ0_F] = 0x4,
5045     [NEON_2RM_VCLE0_F] = 0x4,
5046     [NEON_2RM_VCLT0_F] = 0x4,
5047     [NEON_2RM_VABS_F] = 0x4,
5048     [NEON_2RM_VNEG_F] = 0x4,
5049     [NEON_2RM_VSWP] = 0x1,
5050     [NEON_2RM_VTRN] = 0x7,
5051     [NEON_2RM_VUZP] = 0x7,
5052     [NEON_2RM_VZIP] = 0x7,
5053     [NEON_2RM_VMOVN] = 0x7,
5054     [NEON_2RM_VQMOVN] = 0x7,
5055     [NEON_2RM_VSHLL] = 0x7,
5056     [NEON_2RM_SHA1SU1] = 0x4,
5057     [NEON_2RM_VRINTN] = 0x4,
5058     [NEON_2RM_VRINTX] = 0x4,
5059     [NEON_2RM_VRINTA] = 0x4,
5060     [NEON_2RM_VRINTZ] = 0x4,
5061     [NEON_2RM_VCVT_F16_F32] = 0x2,
5062     [NEON_2RM_VRINTM] = 0x4,
5063     [NEON_2RM_VCVT_F32_F16] = 0x2,
5064     [NEON_2RM_VRINTP] = 0x4,
5065     [NEON_2RM_VCVTAU] = 0x4,
5066     [NEON_2RM_VCVTAS] = 0x4,
5067     [NEON_2RM_VCVTNU] = 0x4,
5068     [NEON_2RM_VCVTNS] = 0x4,
5069     [NEON_2RM_VCVTPU] = 0x4,
5070     [NEON_2RM_VCVTPS] = 0x4,
5071     [NEON_2RM_VCVTMU] = 0x4,
5072     [NEON_2RM_VCVTMS] = 0x4,
5073     [NEON_2RM_VRECPE] = 0x4,
5074     [NEON_2RM_VRSQRTE] = 0x4,
5075     [NEON_2RM_VRECPE_F] = 0x4,
5076     [NEON_2RM_VRSQRTE_F] = 0x4,
5077     [NEON_2RM_VCVT_FS] = 0x4,
5078     [NEON_2RM_VCVT_FU] = 0x4,
5079     [NEON_2RM_VCVT_SF] = 0x4,
5080     [NEON_2RM_VCVT_UF] = 0x4,
5081 };
5082
5083 /* Translate a NEON data processing instruction.  Return nonzero if the
5084    instruction is invalid.
5085    We process data in a mixture of 32-bit and 64-bit chunks.
5086    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5087
5088 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5089 {
5090     int op;
5091     int q;
5092     int rd, rn, rm;
5093     int size;
5094     int shift;
5095     int pass;
5096     int count;
5097     int pairwise;
5098     int u;
5099     uint32_t imm, mask;
5100     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5101     TCGv_i64 tmp64;
5102
5103     /* FIXME: this access check should not take precedence over UNDEF
5104      * for invalid encodings; we will generate incorrect syndrome information
5105      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5106      */
5107     if (s->fp_excp_el) {
5108         gen_exception_insn(s, 4, EXCP_UDEF,
5109                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
5110         return 0;
5111     }
5112
5113     if (!s->vfp_enabled)
5114       return 1;
5115     q = (insn & (1 << 6)) != 0;
5116     u = (insn >> 24) & 1;
5117     VFP_DREG_D(rd, insn);
5118     VFP_DREG_N(rn, insn);
5119     VFP_DREG_M(rm, insn);
5120     size = (insn >> 20) & 3;
5121     if ((insn & (1 << 23)) == 0) {
5122         /* Three register same length.  */
5123         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5124         /* Catch invalid op and bad size combinations: UNDEF */
5125         if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5126             return 1;
5127         }
5128         /* All insns of this form UNDEF for either this condition or the
5129          * superset of cases "Q==1"; we catch the latter later.
5130          */
5131         if (q && ((rd | rn | rm) & 1)) {
5132             return 1;
5133         }
5134         /*
5135          * The SHA-1/SHA-256 3-register instructions require special treatment
5136          * here, as their size field is overloaded as an op type selector, and
5137          * they all consume their input in a single pass.
5138          */
5139         if (op == NEON_3R_SHA) {
5140             if (!q) {
5141                 return 1;
5142             }
5143             if (!u) { /* SHA-1 */
5144                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
5145                     return 1;
5146                 }
5147                 tmp = tcg_const_i32(rd);
5148                 tmp2 = tcg_const_i32(rn);
5149                 tmp3 = tcg_const_i32(rm);
5150                 tmp4 = tcg_const_i32(size);
5151                 gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
5152                 tcg_temp_free_i32(tmp4);
5153             } else { /* SHA-256 */
5154                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
5155                     return 1;
5156                 }
5157                 tmp = tcg_const_i32(rd);
5158                 tmp2 = tcg_const_i32(rn);
5159                 tmp3 = tcg_const_i32(rm);
5160                 switch (size) {
5161                 case 0:
5162                     gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
5163                     break;
5164                 case 1:
5165                     gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
5166                     break;
5167                 case 2:
5168                     gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
5169                     break;
5170                 }
5171             }
5172             tcg_temp_free_i32(tmp);
5173             tcg_temp_free_i32(tmp2);
5174             tcg_temp_free_i32(tmp3);
5175             return 0;
5176         }
5177         if (size == 3 && op != NEON_3R_LOGIC) {
5178             /* 64-bit element instructions. */
5179             for (pass = 0; pass < (q ? 2 : 1); pass++) {
5180                 neon_load_reg64(cpu_V0, rn + pass);
5181                 neon_load_reg64(cpu_V1, rm + pass);
5182                 switch (op) {
5183                 case NEON_3R_VQADD:
5184                     if (u) {
5185                         gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
5186                                                  cpu_V0, cpu_V1);
5187                     } else {
5188                         gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
5189                                                  cpu_V0, cpu_V1);
5190                     }
5191                     break;
5192                 case NEON_3R_VQSUB:
5193                     if (u) {
5194                         gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
5195                                                  cpu_V0, cpu_V1);
5196                     } else {
5197                         gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
5198                                                  cpu_V0, cpu_V1);
5199                     }
5200                     break;
5201                 case NEON_3R_VSHL:
5202                     if (u) {
5203                         gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
5204                     } else {
5205                         gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
5206                     }
5207                     break;
5208                 case NEON_3R_VQSHL:
5209                     if (u) {
5210                         gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5211                                                  cpu_V1, cpu_V0);
5212                     } else {
5213                         gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5214                                                  cpu_V1, cpu_V0);
5215                     }
5216                     break;
5217                 case NEON_3R_VRSHL:
5218                     if (u) {
5219                         gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5220                     } else {
5221                         gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5222                     }
5223                     break;
5224                 case NEON_3R_VQRSHL:
5225                     if (u) {
5226                         gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5227                                                   cpu_V1, cpu_V0);
5228                     } else {
5229                         gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5230                                                   cpu_V1, cpu_V0);
5231                     }
5232                     break;
5233                 case NEON_3R_VADD_VSUB:
5234                     if (u) {
5235                         tcg_gen_sub_i64(CPU_V001);
5236                     } else {
5237                         tcg_gen_add_i64(CPU_V001);
5238                     }
5239                     break;
5240                 default:
5241                     abort();
5242                 }
5243                 neon_store_reg64(cpu_V0, rd + pass);
5244             }
5245             return 0;
5246         }
5247         pairwise = 0;
5248         switch (op) {
5249         case NEON_3R_VSHL:
5250         case NEON_3R_VQSHL:
5251         case NEON_3R_VRSHL:
5252         case NEON_3R_VQRSHL:
5253             {
5254                 int rtmp;
5255                 /* Shift instruction operands are reversed.  */
5256                 rtmp = rn;
5257                 rn = rm;
5258                 rm = rtmp;
5259             }
5260             break;
5261         case NEON_3R_VPADD:
5262             if (u) {
5263                 return 1;
5264             }
5265             /* Fall through */
5266         case NEON_3R_VPMAX:
5267         case NEON_3R_VPMIN:
5268             pairwise = 1;
5269             break;
5270         case NEON_3R_FLOAT_ARITH:
5271             pairwise = (u && size < 2); /* if VPADD (float) */
5272             break;
5273         case NEON_3R_FLOAT_MINMAX:
5274             pairwise = u; /* if VPMIN/VPMAX (float) */
5275             break;
5276         case NEON_3R_FLOAT_CMP:
5277             if (!u && size) {
5278                 /* no encoding for U=0 C=1x */
5279                 return 1;
5280             }
5281             break;
5282         case NEON_3R_FLOAT_ACMP:
5283             if (!u) {
5284                 return 1;
5285             }
5286             break;
5287         case NEON_3R_FLOAT_MISC:
5288             /* VMAXNM/VMINNM in ARMv8 */
5289             if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5290                 return 1;
5291             }
5292             break;
5293         case NEON_3R_VMUL:
5294             if (u && (size != 0)) {
5295                 /* UNDEF on invalid size for polynomial subcase */
5296                 return 1;
5297             }
5298             break;
5299         case NEON_3R_VFM:
5300             if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
5301                 return 1;
5302             }
5303             break;
5304         default:
5305             break;
5306         }
5307
5308         if (pairwise && q) {
5309             /* All the pairwise insns UNDEF if Q is set */
5310             return 1;
5311         }
5312
5313         for (pass = 0; pass < (q ? 4 : 2); pass++) {
5314
5315         if (pairwise) {
5316             /* Pairwise.  */
5317             if (pass < 1) {
5318                 tmp = neon_load_reg(rn, 0);
5319                 tmp2 = neon_load_reg(rn, 1);
5320             } else {
5321                 tmp = neon_load_reg(rm, 0);
5322                 tmp2 = neon_load_reg(rm, 1);
5323             }
5324         } else {
5325             /* Elementwise.  */
5326             tmp = neon_load_reg(rn, pass);
5327             tmp2 = neon_load_reg(rm, pass);
5328         }
5329         switch (op) {
5330         case NEON_3R_VHADD:
5331             GEN_NEON_INTEGER_OP(hadd);
5332             break;
5333         case NEON_3R_VQADD:
5334             GEN_NEON_INTEGER_OP_ENV(qadd);
5335             break;
5336         case NEON_3R_VRHADD:
5337             GEN_NEON_INTEGER_OP(rhadd);
5338             break;
5339         case NEON_3R_LOGIC: /* Logic ops.  */
5340             switch ((u << 2) | size) {
5341             case 0: /* VAND */
5342                 tcg_gen_and_i32(tmp, tmp, tmp2);
5343                 break;
5344             case 1: /* BIC */
5345                 tcg_gen_andc_i32(tmp, tmp, tmp2);
5346                 break;
5347             case 2: /* VORR */
5348                 tcg_gen_or_i32(tmp, tmp, tmp2);
5349                 break;
5350             case 3: /* VORN */
5351                 tcg_gen_orc_i32(tmp, tmp, tmp2);
5352                 break;
5353             case 4: /* VEOR */
5354                 tcg_gen_xor_i32(tmp, tmp, tmp2);
5355                 break;
5356             case 5: /* VBSL */
5357                 tmp3 = neon_load_reg(rd, pass);
5358                 gen_neon_bsl(tmp, tmp, tmp2, tmp3);
5359                 tcg_temp_free_i32(tmp3);
5360                 break;
5361             case 6: /* VBIT */
5362                 tmp3 = neon_load_reg(rd, pass);
5363                 gen_neon_bsl(tmp, tmp, tmp3, tmp2);
5364                 tcg_temp_free_i32(tmp3);
5365                 break;
5366             case 7: /* VBIF */
5367                 tmp3 = neon_load_reg(rd, pass);
5368                 gen_neon_bsl(tmp, tmp3, tmp, tmp2);
5369                 tcg_temp_free_i32(tmp3);
5370                 break;
5371             }
5372             break;
5373         case NEON_3R_VHSUB:
5374             GEN_NEON_INTEGER_OP(hsub);
5375             break;
5376         case NEON_3R_VQSUB:
5377             GEN_NEON_INTEGER_OP_ENV(qsub);
5378             break;
5379         case NEON_3R_VCGT:
5380             GEN_NEON_INTEGER_OP(cgt);
5381             break;
5382         case NEON_3R_VCGE:
5383             GEN_NEON_INTEGER_OP(cge);
5384             break;
5385         case NEON_3R_VSHL:
5386             GEN_NEON_INTEGER_OP(shl);
5387             break;
5388         case NEON_3R_VQSHL:
5389             GEN_NEON_INTEGER_OP_ENV(qshl);
5390             break;
5391         case NEON_3R_VRSHL:
5392             GEN_NEON_INTEGER_OP(rshl);
5393             break;
5394         case NEON_3R_VQRSHL:
5395             GEN_NEON_INTEGER_OP_ENV(qrshl);
5396             break;
5397         case NEON_3R_VMAX:
5398             GEN_NEON_INTEGER_OP(max);
5399             break;
5400         case NEON_3R_VMIN:
5401             GEN_NEON_INTEGER_OP(min);
5402             break;
5403         case NEON_3R_VABD:
5404             GEN_NEON_INTEGER_OP(abd);
5405             break;
5406         case NEON_3R_VABA:
5407             GEN_NEON_INTEGER_OP(abd);
5408             tcg_temp_free_i32(tmp2);
5409             tmp2 = neon_load_reg(rd, pass);
5410             gen_neon_add(size, tmp, tmp2);
5411             break;
5412         case NEON_3R_VADD_VSUB:
5413             if (!u) { /* VADD */
5414                 gen_neon_add(size, tmp, tmp2);
5415             } else { /* VSUB */
5416                 switch (size) {
5417                 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
5418                 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
5419                 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
5420                 default: abort();
5421                 }
5422             }
5423             break;
5424         case NEON_3R_VTST_VCEQ:
5425             if (!u) { /* VTST */
5426                 switch (size) {
5427                 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
5428                 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
5429                 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
5430                 default: abort();
5431                 }
5432             } else { /* VCEQ */
5433                 switch (size) {
5434                 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
5435                 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
5436                 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
5437                 default: abort();
5438                 }
5439             }
5440             break;
5441         case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
5442             switch (size) {
5443             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5444             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5445             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5446             default: abort();
5447             }
5448             tcg_temp_free_i32(tmp2);
5449             tmp2 = neon_load_reg(rd, pass);
5450             if (u) { /* VMLS */
5451                 gen_neon_rsb(size, tmp, tmp2);
5452             } else { /* VMLA */
5453                 gen_neon_add(size, tmp, tmp2);
5454             }
5455             break;
5456         case NEON_3R_VMUL:
5457             if (u) { /* polynomial */
5458                 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
5459             } else { /* Integer */
5460                 switch (size) {
5461                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5462                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5463                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5464                 default: abort();
5465                 }
5466             }
5467             break;
5468         case NEON_3R_VPMAX:
5469             GEN_NEON_INTEGER_OP(pmax);
5470             break;
5471         case NEON_3R_VPMIN:
5472             GEN_NEON_INTEGER_OP(pmin);
5473             break;
5474         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
5475             if (!u) { /* VQDMULH */
5476                 switch (size) {
5477                 case 1:
5478                     gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5479                     break;
5480                 case 2:
5481                     gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5482                     break;
5483                 default: abort();
5484                 }
5485             } else { /* VQRDMULH */
5486                 switch (size) {
5487                 case 1:
5488                     gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5489                     break;
5490                 case 2:
5491                     gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5492                     break;
5493                 default: abort();
5494                 }
5495             }
5496             break;
5497         case NEON_3R_VPADD:
5498             switch (size) {
5499             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5500             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5501             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5502             default: abort();
5503             }
5504             break;
5505         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5506         {
5507             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5508             switch ((u << 2) | size) {
5509             case 0: /* VADD */
5510             case 4: /* VPADD */
5511                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5512                 break;
5513             case 2: /* VSUB */
5514                 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5515                 break;
5516             case 6: /* VABD */
5517                 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5518                 break;
5519             default:
5520                 abort();
5521             }
5522             tcg_temp_free_ptr(fpstatus);
5523             break;
5524         }
5525         case NEON_3R_FLOAT_MULTIPLY:
5526         {
5527             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5528             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5529             if (!u) {
5530                 tcg_temp_free_i32(tmp2);
5531                 tmp2 = neon_load_reg(rd, pass);
5532                 if (size == 0) {
5533                     gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5534                 } else {
5535                     gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5536                 }
5537             }
5538             tcg_temp_free_ptr(fpstatus);
5539             break;
5540         }
5541         case NEON_3R_FLOAT_CMP:
5542         {
5543             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5544             if (!u) {
5545                 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5546             } else {
5547                 if (size == 0) {
5548                     gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5549                 } else {
5550                     gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5551                 }
5552             }
5553             tcg_temp_free_ptr(fpstatus);
5554             break;
5555         }
5556         case NEON_3R_FLOAT_ACMP:
5557         {
5558             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5559             if (size == 0) {
5560                 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5561             } else {
5562                 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5563             }
5564             tcg_temp_free_ptr(fpstatus);
5565             break;
5566         }
5567         case NEON_3R_FLOAT_MINMAX:
5568         {
5569             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5570             if (size == 0) {
5571                 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5572             } else {
5573                 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5574             }
5575             tcg_temp_free_ptr(fpstatus);
5576             break;
5577         }
5578         case NEON_3R_FLOAT_MISC:
5579             if (u) {
5580                 /* VMAXNM/VMINNM */
5581                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5582                 if (size == 0) {
5583                     gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5584                 } else {
5585                     gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5586                 }
5587                 tcg_temp_free_ptr(fpstatus);
5588             } else {
5589                 if (size == 0) {
5590                     gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5591                 } else {
5592                     gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5593               }
5594             }
5595             break;
5596         case NEON_3R_VFM:
5597         {
5598             /* VFMA, VFMS: fused multiply-add */
5599             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5600             TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5601             if (size) {
5602                 /* VFMS */
5603                 gen_helper_vfp_negs(tmp, tmp);
5604             }
5605             gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5606             tcg_temp_free_i32(tmp3);
5607             tcg_temp_free_ptr(fpstatus);
5608             break;
5609         }
5610         default:
5611             abort();
5612         }
5613         tcg_temp_free_i32(tmp2);
5614
5615         /* Save the result.  For elementwise operations we can put it
5616            straight into the destination register.  For pairwise operations
5617            we have to be careful to avoid clobbering the source operands.  */
5618         if (pairwise && rd == rm) {
5619             neon_store_scratch(pass, tmp);
5620         } else {
5621             neon_store_reg(rd, pass, tmp);
5622         }
5623
5624         } /* for pass */
5625         if (pairwise && rd == rm) {
5626             for (pass = 0; pass < (q ? 4 : 2); pass++) {
5627                 tmp = neon_load_scratch(pass);
5628                 neon_store_reg(rd, pass, tmp);
5629             }
5630         }
5631         /* End of 3 register same size operations.  */
5632     } else if (insn & (1 << 4)) {
5633         if ((insn & 0x00380080) != 0) {
5634             /* Two registers and shift.  */
5635             op = (insn >> 8) & 0xf;
5636             if (insn & (1 << 7)) {
5637                 /* 64-bit shift. */
5638                 if (op > 7) {
5639                     return 1;
5640                 }
5641                 size = 3;
5642             } else {
5643                 size = 2;
5644                 while ((insn & (1 << (size + 19))) == 0)
5645                     size--;
5646             }
5647             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5648             /* To avoid excessive duplication of ops we implement shift
5649                by immediate using the variable shift operations.  */
5650             if (op < 8) {
5651                 /* Shift by immediate:
5652                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5653                 if (q && ((rd | rm) & 1)) {
5654                     return 1;
5655                 }
5656                 if (!u && (op == 4 || op == 6)) {
5657                     return 1;
5658                 }
5659                 /* Right shifts are encoded as N - shift, where N is the
5660                    element size in bits.  */
5661                 if (op <= 4)
5662                     shift = shift - (1 << (size + 3));
5663                 if (size == 3) {
5664                     count = q + 1;
5665                 } else {
5666                     count = q ? 4: 2;
5667                 }
5668                 switch (size) {
5669                 case 0:
5670                     imm = (uint8_t) shift;
5671                     imm |= imm << 8;
5672                     imm |= imm << 16;
5673                     break;
5674                 case 1:
5675                     imm = (uint16_t) shift;
5676                     imm |= imm << 16;
5677                     break;
5678                 case 2:
5679                 case 3:
5680                     imm = shift;
5681                     break;
5682                 default:
5683                     abort();
5684                 }
5685
5686                 for (pass = 0; pass < count; pass++) {
5687                     if (size == 3) {
5688                         neon_load_reg64(cpu_V0, rm + pass);
5689                         tcg_gen_movi_i64(cpu_V1, imm);
5690                         switch (op) {
5691                         case 0:  /* VSHR */
5692                         case 1:  /* VSRA */
5693                             if (u)
5694                                 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5695                             else
5696                                 gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
5697                             break;
5698                         case 2: /* VRSHR */
5699                         case 3: /* VRSRA */
5700                             if (u)
5701                                 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5702                             else
5703                                 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5704                             break;
5705                         case 4: /* VSRI */
5706                         case 5: /* VSHL, VSLI */
5707                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5708                             break;
5709                         case 6: /* VQSHLU */
5710                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5711                                                       cpu_V0, cpu_V1);
5712                             break;
5713                         case 7: /* VQSHL */
5714                             if (u) {
5715                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5716                                                          cpu_V0, cpu_V1);
5717                             } else {
5718                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5719                                                          cpu_V0, cpu_V1);
5720                             }
5721                             break;
5722                         }
5723                         if (op == 1 || op == 3) {
5724                             /* Accumulate.  */
5725                             neon_load_reg64(cpu_V1, rd + pass);
5726                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5727                         } else if (op == 4 || (op == 5 && u)) {
5728                             /* Insert */
5729                             neon_load_reg64(cpu_V1, rd + pass);
5730                             uint64_t mask;
5731                             if (shift < -63 || shift > 63) {
5732                                 mask = 0;
5733                             } else {
5734                                 if (op == 4) {
5735                                     mask = 0xffffffffffffffffull >> -shift;
5736                                 } else {
5737                                     mask = 0xffffffffffffffffull << shift;
5738                                 }
5739                             }
5740                             tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
5741                             tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5742                         }
5743                         neon_store_reg64(cpu_V0, rd + pass);
5744                     } else { /* size < 3 */
5745                         /* Operands in T0 and T1.  */
5746                         tmp = neon_load_reg(rm, pass);
5747                         tmp2 = tcg_temp_new_i32();
5748                         tcg_gen_movi_i32(tmp2, imm);
5749                         switch (op) {
5750                         case 0:  /* VSHR */
5751                         case 1:  /* VSRA */
5752                             GEN_NEON_INTEGER_OP(shl);
5753                             break;
5754                         case 2: /* VRSHR */
5755                         case 3: /* VRSRA */
5756                             GEN_NEON_INTEGER_OP(rshl);
5757                             break;
5758                         case 4: /* VSRI */
5759                         case 5: /* VSHL, VSLI */
5760                             switch (size) {
5761                             case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
5762                             case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
5763                             case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
5764                             default: abort();
5765                             }
5766                             break;
5767                         case 6: /* VQSHLU */
5768                             switch (size) {
5769                             case 0:
5770                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5771                                                          tmp, tmp2);
5772                                 break;
5773                             case 1:
5774                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5775                                                           tmp, tmp2);
5776                                 break;
5777                             case 2:
5778                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5779                                                           tmp, tmp2);
5780                                 break;
5781                             default:
5782                                 abort();
5783                             }
5784                             break;
5785                         case 7: /* VQSHL */
5786                             GEN_NEON_INTEGER_OP_ENV(qshl);
5787                             break;
5788                         }
5789                         tcg_temp_free_i32(tmp2);
5790
5791                         if (op == 1 || op == 3) {
5792                             /* Accumulate.  */
5793                             tmp2 = neon_load_reg(rd, pass);
5794                             gen_neon_add(size, tmp, tmp2);
5795                             tcg_temp_free_i32(tmp2);
5796                         } else if (op == 4 || (op == 5 && u)) {
5797                             /* Insert */
5798                             switch (size) {
5799                             case 0:
5800                                 if (op == 4)
5801                                     mask = 0xff >> -shift;
5802                                 else
5803                                     mask = (uint8_t)(0xff << shift);
5804                                 mask |= mask << 8;
5805                                 mask |= mask << 16;
5806                                 break;
5807                             case 1:
5808                                 if (op == 4)
5809                                     mask = 0xffff >> -shift;
5810                                 else
5811                                     mask = (uint16_t)(0xffff << shift);
5812                                 mask |= mask << 16;
5813                                 break;
5814                             case 2:
5815                                 if (shift < -31 || shift > 31) {
5816                                     mask = 0;
5817                                 } else {
5818                                     if (op == 4)
5819                                         mask = 0xffffffffu >> -shift;
5820                                     else
5821                                         mask = 0xffffffffu << shift;
5822                                 }
5823                                 break;
5824                             default:
5825                                 abort();
5826                             }
5827                             tmp2 = neon_load_reg(rd, pass);
5828                             tcg_gen_andi_i32(tmp, tmp, mask);
5829                             tcg_gen_andi_i32(tmp2, tmp2, ~mask);
5830                             tcg_gen_or_i32(tmp, tmp, tmp2);
5831                             tcg_temp_free_i32(tmp2);
5832                         }
5833                         neon_store_reg(rd, pass, tmp);
5834                     }
5835                 } /* for pass */
5836             } else if (op < 10) {
5837                 /* Shift by immediate and narrow:
5838                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5839                 int input_unsigned = (op == 8) ? !u : u;
5840                 if (rm & 1) {
5841                     return 1;
5842                 }
5843                 shift = shift - (1 << (size + 3));
5844                 size++;
5845                 if (size == 3) {
5846                     tmp64 = tcg_const_i64(shift);
5847                     neon_load_reg64(cpu_V0, rm);
5848                     neon_load_reg64(cpu_V1, rm + 1);
5849                     for (pass = 0; pass < 2; pass++) {
5850                         TCGv_i64 in;
5851                         if (pass == 0) {
5852                             in = cpu_V0;
5853                         } else {
5854                             in = cpu_V1;
5855                         }
5856                         if (q) {
5857                             if (input_unsigned) {
5858                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5859                             } else {
5860                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5861                             }
5862                         } else {
5863                             if (input_unsigned) {
5864                                 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5865                             } else {
5866                                 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5867                             }
5868                         }
5869                         tmp = tcg_temp_new_i32();
5870                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5871                         neon_store_reg(rd, pass, tmp);
5872                     } /* for pass */
5873                     tcg_temp_free_i64(tmp64);
5874                 } else {
5875                     if (size == 1) {
5876                         imm = (uint16_t)shift;
5877                         imm |= imm << 16;
5878                     } else {
5879                         /* size == 2 */
5880                         imm = (uint32_t)shift;
5881                     }
5882                     tmp2 = tcg_const_i32(imm);
5883                     tmp4 = neon_load_reg(rm + 1, 0);
5884                     tmp5 = neon_load_reg(rm + 1, 1);
5885                     for (pass = 0; pass < 2; pass++) {
5886                         if (pass == 0) {
5887                             tmp = neon_load_reg(rm, 0);
5888                         } else {
5889                             tmp = tmp4;
5890                         }
5891                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5892                                               input_unsigned);
5893                         if (pass == 0) {
5894                             tmp3 = neon_load_reg(rm, 1);
5895                         } else {
5896                             tmp3 = tmp5;
5897                         }
5898                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5899                                               input_unsigned);
5900                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5901                         tcg_temp_free_i32(tmp);
5902                         tcg_temp_free_i32(tmp3);
5903                         tmp = tcg_temp_new_i32();
5904                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5905                         neon_store_reg(rd, pass, tmp);
5906                     } /* for pass */
5907                     tcg_temp_free_i32(tmp2);
5908                 }
5909             } else if (op == 10) {
5910                 /* VSHLL, VMOVL */
5911                 if (q || (rd & 1)) {
5912                     return 1;
5913                 }
5914                 tmp = neon_load_reg(rm, 0);
5915                 tmp2 = neon_load_reg(rm, 1);
5916                 for (pass = 0; pass < 2; pass++) {
5917                     if (pass == 1)
5918                         tmp = tmp2;
5919
5920                     gen_neon_widen(cpu_V0, tmp, size, u);
5921
5922                     if (shift != 0) {
5923                         /* The shift is less than the width of the source
5924                            type, so we can just shift the whole register.  */
5925                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5926                         /* Widen the result of shift: we need to clear
5927                          * the potential overflow bits resulting from
5928                          * left bits of the narrow input appearing as
5929                          * right bits of left the neighbour narrow
5930                          * input.  */
5931                         if (size < 2 || !u) {
5932                             uint64_t imm64;
5933                             if (size == 0) {
5934                                 imm = (0xffu >> (8 - shift));
5935                                 imm |= imm << 16;
5936                             } else if (size == 1) {
5937                                 imm = 0xffff >> (16 - shift);
5938                             } else {
5939                                 /* size == 2 */
5940                                 imm = 0xffffffff >> (32 - shift);
5941                             }
5942                             if (size < 2) {
5943                                 imm64 = imm | (((uint64_t)imm) << 32);
5944                             } else {
5945                                 imm64 = imm;
5946                             }
5947                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5948                         }
5949                     }
5950                     neon_store_reg64(cpu_V0, rd + pass);
5951                 }
5952             } else if (op >= 14) {
5953                 /* VCVT fixed-point.  */
5954                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5955                     return 1;
5956                 }
5957                 /* We have already masked out the must-be-1 top bit of imm6,
5958                  * hence this 32-shift where the ARM ARM has 64-imm6.
5959                  */
5960                 shift = 32 - shift;
5961                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5962                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5963                     if (!(op & 1)) {
5964                         if (u)
5965                             gen_vfp_ulto(0, shift, 1);
5966                         else
5967                             gen_vfp_slto(0, shift, 1);
5968                     } else {
5969                         if (u)
5970                             gen_vfp_toul(0, shift, 1);
5971                         else
5972                             gen_vfp_tosl(0, shift, 1);
5973                     }
5974                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
5975                 }
5976             } else {
5977                 return 1;
5978             }
5979         } else { /* (insn & 0x00380080) == 0 */
5980             int invert;
5981             if (q && (rd & 1)) {
5982                 return 1;
5983             }
5984
5985             op = (insn >> 8) & 0xf;
5986             /* One register and immediate.  */
5987             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5988             invert = (insn & (1 << 5)) != 0;
5989             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5990              * We choose to not special-case this and will behave as if a
5991              * valid constant encoding of 0 had been given.
5992              */
5993             switch (op) {
5994             case 0: case 1:
5995                 /* no-op */
5996                 break;
5997             case 2: case 3:
5998                 imm <<= 8;
5999                 break;
6000             case 4: case 5:
6001                 imm <<= 16;
6002                 break;
6003             case 6: case 7:
6004                 imm <<= 24;
6005                 break;
6006             case 8: case 9:
6007                 imm |= imm << 16;
6008                 break;
6009             case 10: case 11:
6010                 imm = (imm << 8) | (imm << 24);
6011                 break;
6012             case 12:
6013                 imm = (imm << 8) | 0xff;
6014                 break;
6015             case 13:
6016                 imm = (imm << 16) | 0xffff;
6017                 break;
6018             case 14:
6019                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
6020                 if (invert)
6021                     imm = ~imm;
6022                 break;
6023             case 15:
6024                 if (invert) {
6025                     return 1;
6026                 }
6027                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
6028                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
6029                 break;
6030             }
6031             if (invert)
6032                 imm = ~imm;
6033
6034             for (pass = 0; pass < (q ? 4 : 2); pass++) {
6035                 if (op & 1 && op < 12) {
6036                     tmp = neon_load_reg(rd, pass);
6037                     if (invert) {
6038                         /* The immediate value has already been inverted, so
6039                            BIC becomes AND.  */
6040                         tcg_gen_andi_i32(tmp, tmp, imm);
6041                     } else {
6042                         tcg_gen_ori_i32(tmp, tmp, imm);
6043                     }
6044                 } else {
6045                     /* VMOV, VMVN.  */
6046                     tmp = tcg_temp_new_i32();
6047                     if (op == 14 && invert) {
6048                         int n;
6049                         uint32_t val;
6050                         val = 0;
6051                         for (n = 0; n < 4; n++) {
6052                             if (imm & (1 << (n + (pass & 1) * 4)))
6053                                 val |= 0xff << (n * 8);
6054                         }
6055                         tcg_gen_movi_i32(tmp, val);
6056                     } else {
6057                         tcg_gen_movi_i32(tmp, imm);
6058                     }
6059                 }
6060                 neon_store_reg(rd, pass, tmp);
6061             }
6062         }
6063     } else { /* (insn & 0x00800010 == 0x00800000) */
6064         if (size != 3) {
6065             op = (insn >> 8) & 0xf;
6066             if ((insn & (1 << 6)) == 0) {
6067                 /* Three registers of different lengths.  */
6068                 int src1_wide;
6069                 int src2_wide;
6070                 int prewiden;
6071                 /* undefreq: bit 0 : UNDEF if size == 0
6072                  *           bit 1 : UNDEF if size == 1
6073                  *           bit 2 : UNDEF if size == 2
6074                  *           bit 3 : UNDEF if U == 1
6075                  * Note that [2:0] set implies 'always UNDEF'
6076                  */
6077                 int undefreq;
6078                 /* prewiden, src1_wide, src2_wide, undefreq */
6079                 static const int neon_3reg_wide[16][4] = {
6080                     {1, 0, 0, 0}, /* VADDL */
6081                     {1, 1, 0, 0}, /* VADDW */
6082                     {1, 0, 0, 0}, /* VSUBL */
6083                     {1, 1, 0, 0}, /* VSUBW */
6084                     {0, 1, 1, 0}, /* VADDHN */
6085                     {0, 0, 0, 0}, /* VABAL */
6086                     {0, 1, 1, 0}, /* VSUBHN */
6087                     {0, 0, 0, 0}, /* VABDL */
6088                     {0, 0, 0, 0}, /* VMLAL */
6089                     {0, 0, 0, 9}, /* VQDMLAL */
6090                     {0, 0, 0, 0}, /* VMLSL */
6091                     {0, 0, 0, 9}, /* VQDMLSL */
6092                     {0, 0, 0, 0}, /* Integer VMULL */
6093                     {0, 0, 0, 1}, /* VQDMULL */
6094                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
6095                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
6096                 };
6097
6098                 prewiden = neon_3reg_wide[op][0];
6099                 src1_wide = neon_3reg_wide[op][1];
6100                 src2_wide = neon_3reg_wide[op][2];
6101                 undefreq = neon_3reg_wide[op][3];
6102
6103                 if ((undefreq & (1 << size)) ||
6104                     ((undefreq & 8) && u)) {
6105                     return 1;
6106                 }
6107                 if ((src1_wide && (rn & 1)) ||
6108                     (src2_wide && (rm & 1)) ||
6109                     (!src2_wide && (rd & 1))) {
6110                     return 1;
6111                 }
6112
6113                 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
6114                  * outside the loop below as it only performs a single pass.
6115                  */
6116                 if (op == 14 && size == 2) {
6117                     TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
6118
6119                     if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
6120                         return 1;
6121                     }
6122                     tcg_rn = tcg_temp_new_i64();
6123                     tcg_rm = tcg_temp_new_i64();
6124                     tcg_rd = tcg_temp_new_i64();
6125                     neon_load_reg64(tcg_rn, rn);
6126                     neon_load_reg64(tcg_rm, rm);
6127                     gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
6128                     neon_store_reg64(tcg_rd, rd);
6129                     gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
6130                     neon_store_reg64(tcg_rd, rd + 1);
6131                     tcg_temp_free_i64(tcg_rn);
6132                     tcg_temp_free_i64(tcg_rm);
6133                     tcg_temp_free_i64(tcg_rd);
6134                     return 0;
6135                 }
6136
6137                 /* Avoid overlapping operands.  Wide source operands are
6138                    always aligned so will never overlap with wide
6139                    destinations in problematic ways.  */
6140                 if (rd == rm && !src2_wide) {
6141                     tmp = neon_load_reg(rm, 1);
6142                     neon_store_scratch(2, tmp);
6143                 } else if (rd == rn && !src1_wide) {
6144                     tmp = neon_load_reg(rn, 1);
6145                     neon_store_scratch(2, tmp);
6146                 }
6147                 TCGV_UNUSED_I32(tmp3);
6148                 for (pass = 0; pass < 2; pass++) {
6149                     if (src1_wide) {
6150                         neon_load_reg64(cpu_V0, rn + pass);
6151                         TCGV_UNUSED_I32(tmp);
6152                     } else {
6153                         if (pass == 1 && rd == rn) {
6154                             tmp = neon_load_scratch(2);
6155                         } else {
6156                             tmp = neon_load_reg(rn, pass);
6157                         }
6158                         if (prewiden) {
6159                             gen_neon_widen(cpu_V0, tmp, size, u);
6160                         }
6161                     }
6162                     if (src2_wide) {
6163                         neon_load_reg64(cpu_V1, rm + pass);
6164                         TCGV_UNUSED_I32(tmp2);
6165                     } else {
6166                         if (pass == 1 && rd == rm) {
6167                             tmp2 = neon_load_scratch(2);
6168                         } else {
6169                             tmp2 = neon_load_reg(rm, pass);
6170                         }
6171                         if (prewiden) {
6172                             gen_neon_widen(cpu_V1, tmp2, size, u);
6173                         }
6174                     }
6175                     switch (op) {
6176                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6177                         gen_neon_addl(size);
6178                         break;
6179                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6180                         gen_neon_subl(size);
6181                         break;
6182                     case 5: case 7: /* VABAL, VABDL */
6183                         switch ((size << 1) | u) {
6184                         case 0:
6185                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6186                             break;
6187                         case 1:
6188                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6189                             break;
6190                         case 2:
6191                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6192                             break;
6193                         case 3:
6194                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6195                             break;
6196                         case 4:
6197                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6198                             break;
6199                         case 5:
6200                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6201                             break;
6202                         default: abort();
6203                         }
6204                         tcg_temp_free_i32(tmp2);
6205                         tcg_temp_free_i32(tmp);
6206                         break;
6207                     case 8: case 9: case 10: case 11: case 12: case 13:
6208                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6209                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6210                         break;
6211                     case 14: /* Polynomial VMULL */
6212                         gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
6213                         tcg_temp_free_i32(tmp2);
6214                         tcg_temp_free_i32(tmp);
6215                         break;
6216                     default: /* 15 is RESERVED: caught earlier  */
6217                         abort();
6218                     }
6219                     if (op == 13) {
6220                         /* VQDMULL */
6221                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6222                         neon_store_reg64(cpu_V0, rd + pass);
6223                     } else if (op == 5 || (op >= 8 && op <= 11)) {
6224                         /* Accumulate.  */
6225                         neon_load_reg64(cpu_V1, rd + pass);
6226                         switch (op) {
6227                         case 10: /* VMLSL */
6228                             gen_neon_negl(cpu_V0, size);
6229                             /* Fall through */
6230                         case 5: case 8: /* VABAL, VMLAL */
6231                             gen_neon_addl(size);
6232                             break;
6233                         case 9: case 11: /* VQDMLAL, VQDMLSL */
6234                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6235                             if (op == 11) {
6236                                 gen_neon_negl(cpu_V0, size);
6237                             }
6238                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6239                             break;
6240                         default:
6241                             abort();
6242                         }
6243                         neon_store_reg64(cpu_V0, rd + pass);
6244                     } else if (op == 4 || op == 6) {
6245                         /* Narrowing operation.  */
6246                         tmp = tcg_temp_new_i32();
6247                         if (!u) {
6248                             switch (size) {
6249                             case 0:
6250                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6251                                 break;
6252                             case 1:
6253                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6254                                 break;
6255                             case 2:
6256                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6257                                 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
6258                                 break;
6259                             default: abort();
6260                             }
6261                         } else {
6262                             switch (size) {
6263                             case 0:
6264                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6265                                 break;
6266                             case 1:
6267                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6268                                 break;
6269                             case 2:
6270                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6271                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6272                                 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
6273                                 break;
6274                             default: abort();
6275                             }
6276                         }
6277                         if (pass == 0) {
6278                             tmp3 = tmp;
6279                         } else {
6280                             neon_store_reg(rd, 0, tmp3);
6281                             neon_store_reg(rd, 1, tmp);
6282                         }
6283                     } else {
6284                         /* Write back the result.  */
6285                         neon_store_reg64(cpu_V0, rd + pass);
6286                     }
6287                 }
6288             } else {
6289                 /* Two registers and a scalar. NB that for ops of this form
6290                  * the ARM ARM labels bit 24 as Q, but it is in our variable
6291                  * 'u', not 'q'.
6292                  */
6293                 if (size == 0) {
6294                     return 1;
6295                 }
6296                 switch (op) {
6297                 case 1: /* Float VMLA scalar */
6298                 case 5: /* Floating point VMLS scalar */
6299                 case 9: /* Floating point VMUL scalar */
6300                     if (size == 1) {
6301                         return 1;
6302                     }
6303                     /* fall through */
6304                 case 0: /* Integer VMLA scalar */
6305                 case 4: /* Integer VMLS scalar */
6306                 case 8: /* Integer VMUL scalar */
6307                 case 12: /* VQDMULH scalar */
6308                 case 13: /* VQRDMULH scalar */
6309                     if (u && ((rd | rn) & 1)) {
6310                         return 1;
6311                     }
6312                     tmp = neon_get_scalar(size, rm);
6313                     neon_store_scratch(0, tmp);
6314                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
6315                         tmp = neon_load_scratch(0);
6316                         tmp2 = neon_load_reg(rn, pass);
6317                         if (op == 12) {
6318                             if (size == 1) {
6319                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6320                             } else {
6321                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6322                             }
6323                         } else if (op == 13) {
6324                             if (size == 1) {
6325                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6326                             } else {
6327                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6328                             }
6329                         } else if (op & 1) {
6330                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6331                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6332                             tcg_temp_free_ptr(fpstatus);
6333                         } else {
6334                             switch (size) {
6335                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6336                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6337                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6338                             default: abort();
6339                             }
6340                         }
6341                         tcg_temp_free_i32(tmp2);
6342                         if (op < 8) {
6343                             /* Accumulate.  */
6344                             tmp2 = neon_load_reg(rd, pass);
6345                             switch (op) {
6346                             case 0:
6347                                 gen_neon_add(size, tmp, tmp2);
6348                                 break;
6349                             case 1:
6350                             {
6351                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6352                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6353                                 tcg_temp_free_ptr(fpstatus);
6354                                 break;
6355                             }
6356                             case 4:
6357                                 gen_neon_rsb(size, tmp, tmp2);
6358                                 break;
6359                             case 5:
6360                             {
6361                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6362                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6363                                 tcg_temp_free_ptr(fpstatus);
6364                                 break;
6365                             }
6366                             default:
6367                                 abort();
6368                             }
6369                             tcg_temp_free_i32(tmp2);
6370                         }
6371                         neon_store_reg(rd, pass, tmp);
6372                     }
6373                     break;
6374                 case 3: /* VQDMLAL scalar */
6375                 case 7: /* VQDMLSL scalar */
6376                 case 11: /* VQDMULL scalar */
6377                     if (u == 1) {
6378                         return 1;
6379                     }
6380                     /* fall through */
6381                 case 2: /* VMLAL sclar */
6382                 case 6: /* VMLSL scalar */
6383                 case 10: /* VMULL scalar */
6384                     if (rd & 1) {
6385                         return 1;
6386                     }
6387                     tmp2 = neon_get_scalar(size, rm);
6388                     /* We need a copy of tmp2 because gen_neon_mull
6389                      * deletes it during pass 0.  */
6390                     tmp4 = tcg_temp_new_i32();
6391                     tcg_gen_mov_i32(tmp4, tmp2);
6392                     tmp3 = neon_load_reg(rn, 1);
6393
6394                     for (pass = 0; pass < 2; pass++) {
6395                         if (pass == 0) {
6396                             tmp = neon_load_reg(rn, 0);
6397                         } else {
6398                             tmp = tmp3;
6399                             tmp2 = tmp4;
6400                         }
6401                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6402                         if (op != 11) {
6403                             neon_load_reg64(cpu_V1, rd + pass);
6404                         }
6405                         switch (op) {
6406                         case 6:
6407                             gen_neon_negl(cpu_V0, size);
6408                             /* Fall through */
6409                         case 2:
6410                             gen_neon_addl(size);
6411                             break;
6412                         case 3: case 7:
6413                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6414                             if (op == 7) {
6415                                 gen_neon_negl(cpu_V0, size);
6416                             }
6417                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6418                             break;
6419                         case 10:
6420                             /* no-op */
6421                             break;
6422                         case 11:
6423                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6424                             break;
6425                         default:
6426                             abort();
6427                         }
6428                         neon_store_reg64(cpu_V0, rd + pass);
6429                     }
6430
6431
6432                     break;
6433                 default: /* 14 and 15 are RESERVED */
6434                     return 1;
6435                 }
6436             }
6437         } else { /* size == 3 */
6438             if (!u) {
6439                 /* Extract.  */
6440                 imm = (insn >> 8) & 0xf;
6441
6442                 if (imm > 7 && !q)
6443                     return 1;
6444
6445                 if (q && ((rd | rn | rm) & 1)) {
6446                     return 1;
6447                 }
6448
6449                 if (imm == 0) {
6450                     neon_load_reg64(cpu_V0, rn);
6451                     if (q) {
6452                         neon_load_reg64(cpu_V1, rn + 1);
6453                     }
6454                 } else if (imm == 8) {
6455                     neon_load_reg64(cpu_V0, rn + 1);
6456                     if (q) {
6457                         neon_load_reg64(cpu_V1, rm);
6458                     }
6459                 } else if (q) {
6460                     tmp64 = tcg_temp_new_i64();
6461                     if (imm < 8) {
6462                         neon_load_reg64(cpu_V0, rn);
6463                         neon_load_reg64(tmp64, rn + 1);
6464                     } else {
6465                         neon_load_reg64(cpu_V0, rn + 1);
6466                         neon_load_reg64(tmp64, rm);
6467                     }
6468                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6469                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6470                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6471                     if (imm < 8) {
6472                         neon_load_reg64(cpu_V1, rm);
6473                     } else {
6474                         neon_load_reg64(cpu_V1, rm + 1);
6475                         imm -= 8;
6476                     }
6477                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6478                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6479                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6480                     tcg_temp_free_i64(tmp64);
6481                 } else {
6482                     /* BUGFIX */
6483                     neon_load_reg64(cpu_V0, rn);
6484                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6485                     neon_load_reg64(cpu_V1, rm);
6486                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6487                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6488                 }
6489                 neon_store_reg64(cpu_V0, rd);
6490                 if (q) {
6491                     neon_store_reg64(cpu_V1, rd + 1);
6492                 }
6493             } else if ((insn & (1 << 11)) == 0) {
6494                 /* Two register misc.  */
6495                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6496                 size = (insn >> 18) & 3;
6497                 /* UNDEF for unknown op values and bad op-size combinations */
6498                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6499                     return 1;
6500                 }
6501                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6502                     q && ((rm | rd) & 1)) {
6503                     return 1;
6504                 }
6505                 switch (op) {
6506                 case NEON_2RM_VREV64:
6507                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6508                         tmp = neon_load_reg(rm, pass * 2);
6509                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6510                         switch (size) {
6511                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6512                         case 1: gen_swap_half(tmp); break;
6513                         case 2: /* no-op */ break;
6514                         default: abort();
6515                         }
6516                         neon_store_reg(rd, pass * 2 + 1, tmp);
6517                         if (size == 2) {
6518                             neon_store_reg(rd, pass * 2, tmp2);
6519                         } else {
6520                             switch (size) {
6521                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6522                             case 1: gen_swap_half(tmp2); break;
6523                             default: abort();
6524                             }
6525                             neon_store_reg(rd, pass * 2, tmp2);
6526                         }
6527                     }
6528                     break;
6529                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6530                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6531                     for (pass = 0; pass < q + 1; pass++) {
6532                         tmp = neon_load_reg(rm, pass * 2);
6533                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6534                         tmp = neon_load_reg(rm, pass * 2 + 1);
6535                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6536                         switch (size) {
6537                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6538                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6539                         case 2: tcg_gen_add_i64(CPU_V001); break;
6540                         default: abort();
6541                         }
6542                         if (op >= NEON_2RM_VPADAL) {
6543                             /* Accumulate.  */
6544                             neon_load_reg64(cpu_V1, rd + pass);
6545                             gen_neon_addl(size);
6546                         }
6547                         neon_store_reg64(cpu_V0, rd + pass);
6548                     }
6549                     break;
6550                 case NEON_2RM_VTRN:
6551                     if (size == 2) {
6552                         int n;
6553                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6554                             tmp = neon_load_reg(rm, n);
6555                             tmp2 = neon_load_reg(rd, n + 1);
6556                             neon_store_reg(rm, n, tmp2);
6557                             neon_store_reg(rd, n + 1, tmp);
6558                         }
6559                     } else {
6560                         goto elementwise;
6561                     }
6562                     break;
6563                 case NEON_2RM_VUZP:
6564                     if (gen_neon_unzip(rd, rm, size, q)) {
6565                         return 1;
6566                     }
6567                     break;
6568                 case NEON_2RM_VZIP:
6569                     if (gen_neon_zip(rd, rm, size, q)) {
6570                         return 1;
6571                     }
6572                     break;
6573                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6574                     /* also VQMOVUN; op field and mnemonics don't line up */
6575                     if (rm & 1) {
6576                         return 1;
6577                     }
6578                     TCGV_UNUSED_I32(tmp2);
6579                     for (pass = 0; pass < 2; pass++) {
6580                         neon_load_reg64(cpu_V0, rm + pass);
6581                         tmp = tcg_temp_new_i32();
6582                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6583                                            tmp, cpu_V0);
6584                         if (pass == 0) {
6585                             tmp2 = tmp;
6586                         } else {
6587                             neon_store_reg(rd, 0, tmp2);
6588                             neon_store_reg(rd, 1, tmp);
6589                         }
6590                     }
6591                     break;
6592                 case NEON_2RM_VSHLL:
6593                     if (q || (rd & 1)) {
6594                         return 1;
6595                     }
6596                     tmp = neon_load_reg(rm, 0);
6597                     tmp2 = neon_load_reg(rm, 1);
6598                     for (pass = 0; pass < 2; pass++) {
6599                         if (pass == 1)
6600                             tmp = tmp2;
6601                         gen_neon_widen(cpu_V0, tmp, size, 1);
6602                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6603                         neon_store_reg64(cpu_V0, rd + pass);
6604                     }
6605                     break;
6606                 case NEON_2RM_VCVT_F16_F32:
6607                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6608                         q || (rm & 1)) {
6609                         return 1;
6610                     }
6611                     tmp = tcg_temp_new_i32();
6612                     tmp2 = tcg_temp_new_i32();
6613                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
6614                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6615                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
6616                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6617                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6618                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6619                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
6620                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6621                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
6622                     neon_store_reg(rd, 0, tmp2);
6623                     tmp2 = tcg_temp_new_i32();
6624                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6625                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6626                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6627                     neon_store_reg(rd, 1, tmp2);
6628                     tcg_temp_free_i32(tmp);
6629                     break;
6630                 case NEON_2RM_VCVT_F32_F16:
6631                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6632                         q || (rd & 1)) {
6633                         return 1;
6634                     }
6635                     tmp3 = tcg_temp_new_i32();
6636                     tmp = neon_load_reg(rm, 0);
6637                     tmp2 = neon_load_reg(rm, 1);
6638                     tcg_gen_ext16u_i32(tmp3, tmp);
6639                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6640                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
6641                     tcg_gen_shri_i32(tmp3, tmp, 16);
6642                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6643                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
6644                     tcg_temp_free_i32(tmp);
6645                     tcg_gen_ext16u_i32(tmp3, tmp2);
6646                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6647                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
6648                     tcg_gen_shri_i32(tmp3, tmp2, 16);
6649                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6650                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
6651                     tcg_temp_free_i32(tmp2);
6652                     tcg_temp_free_i32(tmp3);
6653                     break;
6654                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6655                     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
6656                         || ((rm | rd) & 1)) {
6657                         return 1;
6658                     }
6659                     tmp = tcg_const_i32(rd);
6660                     tmp2 = tcg_const_i32(rm);
6661
6662                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6663                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6664                       */
6665                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6666
6667                     if (op == NEON_2RM_AESE) {
6668                         gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
6669                     } else {
6670                         gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
6671                     }
6672                     tcg_temp_free_i32(tmp);
6673                     tcg_temp_free_i32(tmp2);
6674                     tcg_temp_free_i32(tmp3);
6675                     break;
6676                 case NEON_2RM_SHA1H:
6677                     if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
6678                         || ((rm | rd) & 1)) {
6679                         return 1;
6680                     }
6681                     tmp = tcg_const_i32(rd);
6682                     tmp2 = tcg_const_i32(rm);
6683
6684                     gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
6685
6686                     tcg_temp_free_i32(tmp);
6687                     tcg_temp_free_i32(tmp2);
6688                     break;
6689                 case NEON_2RM_SHA1SU1:
6690                     if ((rm | rd) & 1) {
6691                             return 1;
6692                     }
6693                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6694                     if (q) {
6695                         if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
6696                             return 1;
6697                         }
6698                     } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
6699                         return 1;
6700                     }
6701                     tmp = tcg_const_i32(rd);
6702                     tmp2 = tcg_const_i32(rm);
6703                     if (q) {
6704                         gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
6705                     } else {
6706                         gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
6707                     }
6708                     tcg_temp_free_i32(tmp);
6709                     tcg_temp_free_i32(tmp2);
6710                     break;
6711                 default:
6712                 elementwise:
6713                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6714                         if (neon_2rm_is_float_op(op)) {
6715                             tcg_gen_ld_f32(cpu_F0s, cpu_env,
6716                                            neon_reg_offset(rm, pass));
6717                             TCGV_UNUSED_I32(tmp);
6718                         } else {
6719                             tmp = neon_load_reg(rm, pass);
6720                         }
6721                         switch (op) {
6722                         case NEON_2RM_VREV32:
6723                             switch (size) {
6724                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6725                             case 1: gen_swap_half(tmp); break;
6726                             default: abort();
6727                             }
6728                             break;
6729                         case NEON_2RM_VREV16:
6730                             gen_rev16(tmp);
6731                             break;
6732                         case NEON_2RM_VCLS:
6733                             switch (size) {
6734                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6735                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6736                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6737                             default: abort();
6738                             }
6739                             break;
6740                         case NEON_2RM_VCLZ:
6741                             switch (size) {
6742                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6743                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6744                             case 2: gen_helper_clz(tmp, tmp); break;
6745                             default: abort();
6746                             }
6747                             break;
6748                         case NEON_2RM_VCNT:
6749                             gen_helper_neon_cnt_u8(tmp, tmp);
6750                             break;
6751                         case NEON_2RM_VMVN:
6752                             tcg_gen_not_i32(tmp, tmp);
6753                             break;
6754                         case NEON_2RM_VQABS:
6755                             switch (size) {
6756                             case 0:
6757                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6758                                 break;
6759                             case 1:
6760                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6761                                 break;
6762                             case 2:
6763                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6764                                 break;
6765                             default: abort();
6766                             }
6767                             break;
6768                         case NEON_2RM_VQNEG:
6769                             switch (size) {
6770                             case 0:
6771                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6772                                 break;
6773                             case 1:
6774                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6775                                 break;
6776                             case 2:
6777                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6778                                 break;
6779                             default: abort();
6780                             }
6781                             break;
6782                         case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6783                             tmp2 = tcg_const_i32(0);
6784                             switch(size) {
6785                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6786                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6787                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6788                             default: abort();
6789                             }
6790                             tcg_temp_free_i32(tmp2);
6791                             if (op == NEON_2RM_VCLE0) {
6792                                 tcg_gen_not_i32(tmp, tmp);
6793                             }
6794                             break;
6795                         case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6796                             tmp2 = tcg_const_i32(0);
6797                             switch(size) {
6798                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6799                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6800                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6801                             default: abort();
6802                             }
6803                             tcg_temp_free_i32(tmp2);
6804                             if (op == NEON_2RM_VCLT0) {
6805                                 tcg_gen_not_i32(tmp, tmp);
6806                             }
6807                             break;
6808                         case NEON_2RM_VCEQ0:
6809                             tmp2 = tcg_const_i32(0);
6810                             switch(size) {
6811                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6812                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6813                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6814                             default: abort();
6815                             }
6816                             tcg_temp_free_i32(tmp2);
6817                             break;
6818                         case NEON_2RM_VABS:
6819                             switch(size) {
6820                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
6821                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
6822                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
6823                             default: abort();
6824                             }
6825                             break;
6826                         case NEON_2RM_VNEG:
6827                             tmp2 = tcg_const_i32(0);
6828                             gen_neon_rsb(size, tmp, tmp2);
6829                             tcg_temp_free_i32(tmp2);
6830                             break;
6831                         case NEON_2RM_VCGT0_F:
6832                         {
6833                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6834                             tmp2 = tcg_const_i32(0);
6835                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6836                             tcg_temp_free_i32(tmp2);
6837                             tcg_temp_free_ptr(fpstatus);
6838                             break;
6839                         }
6840                         case NEON_2RM_VCGE0_F:
6841                         {
6842                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6843                             tmp2 = tcg_const_i32(0);
6844                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6845                             tcg_temp_free_i32(tmp2);
6846                             tcg_temp_free_ptr(fpstatus);
6847                             break;
6848                         }
6849                         case NEON_2RM_VCEQ0_F:
6850                         {
6851                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6852                             tmp2 = tcg_const_i32(0);
6853                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6854                             tcg_temp_free_i32(tmp2);
6855                             tcg_temp_free_ptr(fpstatus);
6856                             break;
6857                         }
6858                         case NEON_2RM_VCLE0_F:
6859                         {
6860                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6861                             tmp2 = tcg_const_i32(0);
6862                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6863                             tcg_temp_free_i32(tmp2);
6864                             tcg_temp_free_ptr(fpstatus);
6865                             break;
6866                         }
6867                         case NEON_2RM_VCLT0_F:
6868                         {
6869                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6870                             tmp2 = tcg_const_i32(0);
6871                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6872                             tcg_temp_free_i32(tmp2);
6873                             tcg_temp_free_ptr(fpstatus);
6874                             break;
6875                         }
6876                         case NEON_2RM_VABS_F:
6877                             gen_vfp_abs(0);
6878                             break;
6879                         case NEON_2RM_VNEG_F:
6880                             gen_vfp_neg(0);
6881                             break;
6882                         case NEON_2RM_VSWP:
6883                             tmp2 = neon_load_reg(rd, pass);
6884                             neon_store_reg(rm, pass, tmp2);
6885                             break;
6886                         case NEON_2RM_VTRN:
6887                             tmp2 = neon_load_reg(rd, pass);
6888                             switch (size) {
6889                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6890                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6891                             default: abort();
6892                             }
6893                             neon_store_reg(rm, pass, tmp2);
6894                             break;
6895                         case NEON_2RM_VRINTN:
6896                         case NEON_2RM_VRINTA:
6897                         case NEON_2RM_VRINTM:
6898                         case NEON_2RM_VRINTP:
6899                         case NEON_2RM_VRINTZ:
6900                         {
6901                             TCGv_i32 tcg_rmode;
6902                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6903                             int rmode;
6904
6905                             if (op == NEON_2RM_VRINTZ) {
6906                                 rmode = FPROUNDING_ZERO;
6907                             } else {
6908                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6909                             }
6910
6911                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6912                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6913                                                       cpu_env);
6914                             gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
6915                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6916                                                       cpu_env);
6917                             tcg_temp_free_ptr(fpstatus);
6918                             tcg_temp_free_i32(tcg_rmode);
6919                             break;
6920                         }
6921                         case NEON_2RM_VRINTX:
6922                         {
6923                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6924                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
6925                             tcg_temp_free_ptr(fpstatus);
6926                             break;
6927                         }
6928                         case NEON_2RM_VCVTAU:
6929                         case NEON_2RM_VCVTAS:
6930                         case NEON_2RM_VCVTNU:
6931                         case NEON_2RM_VCVTNS:
6932                         case NEON_2RM_VCVTPU:
6933                         case NEON_2RM_VCVTPS:
6934                         case NEON_2RM_VCVTMU:
6935                         case NEON_2RM_VCVTMS:
6936                         {
6937                             bool is_signed = !extract32(insn, 7, 1);
6938                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6939                             TCGv_i32 tcg_rmode, tcg_shift;
6940                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6941
6942                             tcg_shift = tcg_const_i32(0);
6943                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6944                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6945                                                       cpu_env);
6946
6947                             if (is_signed) {
6948                                 gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
6949                                                      tcg_shift, fpst);
6950                             } else {
6951                                 gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
6952                                                      tcg_shift, fpst);
6953                             }
6954
6955                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6956                                                       cpu_env);
6957                             tcg_temp_free_i32(tcg_rmode);
6958                             tcg_temp_free_i32(tcg_shift);
6959                             tcg_temp_free_ptr(fpst);
6960                             break;
6961                         }
6962                         case NEON_2RM_VRECPE:
6963                         {
6964                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6965                             gen_helper_recpe_u32(tmp, tmp, fpstatus);
6966                             tcg_temp_free_ptr(fpstatus);
6967                             break;
6968                         }
6969                         case NEON_2RM_VRSQRTE:
6970                         {
6971                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6972                             gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6973                             tcg_temp_free_ptr(fpstatus);
6974                             break;
6975                         }
6976                         case NEON_2RM_VRECPE_F:
6977                         {
6978                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6979                             gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
6980                             tcg_temp_free_ptr(fpstatus);
6981                             break;
6982                         }
6983                         case NEON_2RM_VRSQRTE_F:
6984                         {
6985                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6986                             gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
6987                             tcg_temp_free_ptr(fpstatus);
6988                             break;
6989                         }
6990                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6991                             gen_vfp_sito(0, 1);
6992                             break;
6993                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6994                             gen_vfp_uito(0, 1);
6995                             break;
6996                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6997                             gen_vfp_tosiz(0, 1);
6998                             break;
6999                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
7000                             gen_vfp_touiz(0, 1);
7001                             break;
7002                         default:
7003                             /* Reserved op values were caught by the
7004                              * neon_2rm_sizes[] check earlier.
7005                              */
7006                             abort();
7007                         }
7008                         if (neon_2rm_is_float_op(op)) {
7009                             tcg_gen_st_f32(cpu_F0s, cpu_env,
7010                                            neon_reg_offset(rd, pass));
7011                         } else {
7012                             neon_store_reg(rd, pass, tmp);
7013                         }
7014                     }
7015                     break;
7016                 }
7017             } else if ((insn & (1 << 10)) == 0) {
7018                 /* VTBL, VTBX.  */
7019                 int n = ((insn >> 8) & 3) + 1;
7020                 if ((rn + n) > 32) {
7021                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
7022                      * helper function running off the end of the register file.
7023                      */
7024                     return 1;
7025                 }
7026                 n <<= 3;
7027                 if (insn & (1 << 6)) {
7028                     tmp = neon_load_reg(rd, 0);
7029                 } else {
7030                     tmp = tcg_temp_new_i32();
7031                     tcg_gen_movi_i32(tmp, 0);
7032                 }
7033                 tmp2 = neon_load_reg(rm, 0);
7034                 tmp4 = tcg_const_i32(rn);
7035                 tmp5 = tcg_const_i32(n);
7036                 gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
7037                 tcg_temp_free_i32(tmp);
7038                 if (insn & (1 << 6)) {
7039                     tmp = neon_load_reg(rd, 1);
7040                 } else {
7041                     tmp = tcg_temp_new_i32();
7042                     tcg_gen_movi_i32(tmp, 0);
7043                 }
7044                 tmp3 = neon_load_reg(rm, 1);
7045                 gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
7046                 tcg_temp_free_i32(tmp5);
7047                 tcg_temp_free_i32(tmp4);
7048                 neon_store_reg(rd, 0, tmp2);
7049                 neon_store_reg(rd, 1, tmp3);
7050                 tcg_temp_free_i32(tmp);
7051             } else if ((insn & 0x380) == 0) {
7052                 /* VDUP */
7053                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7054                     return 1;
7055                 }
7056                 if (insn & (1 << 19)) {
7057                     tmp = neon_load_reg(rm, 1);
7058                 } else {
7059                     tmp = neon_load_reg(rm, 0);
7060                 }
7061                 if (insn & (1 << 16)) {
7062                     gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
7063                 } else if (insn & (1 << 17)) {
7064                     if ((insn >> 18) & 1)
7065                         gen_neon_dup_high16(tmp);
7066                     else
7067                         gen_neon_dup_low16(tmp);
7068                 }
7069                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7070                     tmp2 = tcg_temp_new_i32();
7071                     tcg_gen_mov_i32(tmp2, tmp);
7072                     neon_store_reg(rd, pass, tmp2);
7073                 }
7074                 tcg_temp_free_i32(tmp);
7075             } else {
7076                 return 1;
7077             }
7078         }
7079     }
7080     return 0;
7081 }
7082
7083 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7084 {
7085     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7086     const ARMCPRegInfo *ri;
7087
7088     cpnum = (insn >> 8) & 0xf;
7089
7090     /* First check for coprocessor space used for XScale/iwMMXt insns */
7091     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7092         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7093             return 1;
7094         }
7095         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7096             return disas_iwmmxt_insn(s, insn);
7097         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7098             return disas_dsp_insn(s, insn);
7099         }
7100         return 1;
7101     }
7102
7103     /* Otherwise treat as a generic register access */
7104     is64 = (insn & (1 << 25)) == 0;
7105     if (!is64 && ((insn & (1 << 4)) == 0)) {
7106         /* cdp */
7107         return 1;
7108     }
7109
7110     crm = insn & 0xf;
7111     if (is64) {
7112         crn = 0;
7113         opc1 = (insn >> 4) & 0xf;
7114         opc2 = 0;
7115         rt2 = (insn >> 16) & 0xf;
7116     } else {
7117         crn = (insn >> 16) & 0xf;
7118         opc1 = (insn >> 21) & 7;
7119         opc2 = (insn >> 5) & 7;
7120         rt2 = 0;
7121     }
7122     isread = (insn >> 20) & 1;
7123     rt = (insn >> 12) & 0xf;
7124
7125     ri = get_arm_cp_reginfo(s->cp_regs,
7126             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7127     if (ri) {
7128         /* Check access permissions */
7129         if (!cp_access_ok(s->current_el, ri, isread)) {
7130             return 1;
7131         }
7132
7133         if (ri->accessfn ||
7134             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7135             /* Emit code to perform further access permissions checks at
7136              * runtime; this may result in an exception.
7137              * Note that on XScale all cp0..c13 registers do an access check
7138              * call in order to handle c15_cpar.
7139              */
7140             TCGv_ptr tmpptr;
7141             TCGv_i32 tcg_syn;
7142             uint32_t syndrome;
7143
7144             /* Note that since we are an implementation which takes an
7145              * exception on a trapped conditional instruction only if the
7146              * instruction passes its condition code check, we can take
7147              * advantage of the clause in the ARM ARM that allows us to set
7148              * the COND field in the instruction to 0xE in all cases.
7149              * We could fish the actual condition out of the insn (ARM)
7150              * or the condexec bits (Thumb) but it isn't necessary.
7151              */
7152             switch (cpnum) {
7153             case 14:
7154                 if (is64) {
7155                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7156                                                  isread, s->thumb);
7157                 } else {
7158                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7159                                                 rt, isread, s->thumb);
7160                 }
7161                 break;
7162             case 15:
7163                 if (is64) {
7164                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7165                                                  isread, s->thumb);
7166                 } else {
7167                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7168                                                 rt, isread, s->thumb);
7169                 }
7170                 break;
7171             default:
7172                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7173                  * so this can only happen if this is an ARMv7 or earlier CPU,
7174                  * in which case the syndrome information won't actually be
7175                  * guest visible.
7176                  */
7177                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7178                 syndrome = syn_uncategorized();
7179                 break;
7180             }
7181
7182             gen_set_pc_im(s, s->pc - 4);
7183             tmpptr = tcg_const_ptr(ri);
7184             tcg_syn = tcg_const_i32(syndrome);
7185             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
7186             tcg_temp_free_ptr(tmpptr);
7187             tcg_temp_free_i32(tcg_syn);
7188         }
7189
7190         /* Handle special cases first */
7191         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7192         case ARM_CP_NOP:
7193             return 0;
7194         case ARM_CP_WFI:
7195             if (isread) {
7196                 return 1;
7197             }
7198             gen_set_pc_im(s, s->pc);
7199             s->is_jmp = DISAS_WFI;
7200             return 0;
7201         default:
7202             break;
7203         }
7204
7205         if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7206             gen_io_start();
7207         }
7208
7209         if (isread) {
7210             /* Read */
7211             if (is64) {
7212                 TCGv_i64 tmp64;
7213                 TCGv_i32 tmp;
7214                 if (ri->type & ARM_CP_CONST) {
7215                     tmp64 = tcg_const_i64(ri->resetvalue);
7216                 } else if (ri->readfn) {
7217                     TCGv_ptr tmpptr;
7218                     tmp64 = tcg_temp_new_i64();
7219                     tmpptr = tcg_const_ptr(ri);
7220                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7221                     tcg_temp_free_ptr(tmpptr);
7222                 } else {
7223                     tmp64 = tcg_temp_new_i64();
7224                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7225                 }
7226                 tmp = tcg_temp_new_i32();
7227                 tcg_gen_trunc_i64_i32(tmp, tmp64);
7228                 store_reg(s, rt, tmp);
7229                 tcg_gen_shri_i64(tmp64, tmp64, 32);
7230                 tmp = tcg_temp_new_i32();
7231                 tcg_gen_trunc_i64_i32(tmp, tmp64);
7232                 tcg_temp_free_i64(tmp64);
7233                 store_reg(s, rt2, tmp);
7234             } else {
7235                 TCGv_i32 tmp;
7236                 if (ri->type & ARM_CP_CONST) {
7237                     tmp = tcg_const_i32(ri->resetvalue);
7238                 } else if (ri->readfn) {
7239                     TCGv_ptr tmpptr;
7240                     tmp = tcg_temp_new_i32();
7241                     tmpptr = tcg_const_ptr(ri);
7242                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7243                     tcg_temp_free_ptr(tmpptr);
7244                 } else {
7245                     tmp = load_cpu_offset(ri->fieldoffset);
7246                 }
7247                 if (rt == 15) {
7248                     /* Destination register of r15 for 32 bit loads sets
7249                      * the condition codes from the high 4 bits of the value
7250                      */
7251                     gen_set_nzcv(tmp);
7252                     tcg_temp_free_i32(tmp);
7253                 } else {
7254                     store_reg(s, rt, tmp);
7255                 }
7256             }
7257         } else {
7258             /* Write */
7259             if (ri->type & ARM_CP_CONST) {
7260                 /* If not forbidden by access permissions, treat as WI */
7261                 return 0;
7262             }
7263
7264             if (is64) {
7265                 TCGv_i32 tmplo, tmphi;
7266                 TCGv_i64 tmp64 = tcg_temp_new_i64();
7267                 tmplo = load_reg(s, rt);
7268                 tmphi = load_reg(s, rt2);
7269                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7270                 tcg_temp_free_i32(tmplo);
7271                 tcg_temp_free_i32(tmphi);
7272                 if (ri->writefn) {
7273                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
7274                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7275                     tcg_temp_free_ptr(tmpptr);
7276                 } else {
7277                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7278                 }
7279                 tcg_temp_free_i64(tmp64);
7280             } else {
7281                 if (ri->writefn) {
7282                     TCGv_i32 tmp;
7283                     TCGv_ptr tmpptr;
7284                     tmp = load_reg(s, rt);
7285                     tmpptr = tcg_const_ptr(ri);
7286                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7287                     tcg_temp_free_ptr(tmpptr);
7288                     tcg_temp_free_i32(tmp);
7289                 } else {
7290                     TCGv_i32 tmp = load_reg(s, rt);
7291                     store_cpu_offset(tmp, ri->fieldoffset);
7292                 }
7293             }
7294         }
7295
7296         if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7297             /* I/O operations must end the TB here (whether read or write) */
7298             gen_io_end();
7299             gen_lookup_tb(s);
7300         } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7301             /* We default to ending the TB on a coprocessor register write,
7302              * but allow this to be suppressed by the register definition
7303              * (usually only necessary to work around guest bugs).
7304              */
7305             gen_lookup_tb(s);
7306         }
7307
7308         return 0;
7309     }
7310
7311     /* Unknown register; this might be a guest error or a QEMU
7312      * unimplemented feature.
7313      */
7314     if (is64) {
7315         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7316                       "64 bit system register cp:%d opc1: %d crm:%d "
7317                       "(%s)\n",
7318                       isread ? "read" : "write", cpnum, opc1, crm,
7319                       s->ns ? "non-secure" : "secure");
7320     } else {
7321         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7322                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7323                       "(%s)\n",
7324                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7325                       s->ns ? "non-secure" : "secure");
7326     }
7327
7328     return 1;
7329 }
7330
7331
7332 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7333 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7334 {
7335     TCGv_i32 tmp;
7336     tmp = tcg_temp_new_i32();
7337     tcg_gen_trunc_i64_i32(tmp, val);
7338     store_reg(s, rlow, tmp);
7339     tmp = tcg_temp_new_i32();
7340     tcg_gen_shri_i64(val, val, 32);
7341     tcg_gen_trunc_i64_i32(tmp, val);
7342     store_reg(s, rhigh, tmp);
7343 }
7344
7345 /* load a 32-bit value from a register and perform a 64-bit accumulate.  */
7346 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
7347 {
7348     TCGv_i64 tmp;
7349     TCGv_i32 tmp2;
7350
7351     /* Load value and extend to 64 bits.  */
7352     tmp = tcg_temp_new_i64();
7353     tmp2 = load_reg(s, rlow);
7354     tcg_gen_extu_i32_i64(tmp, tmp2);
7355     tcg_temp_free_i32(tmp2);
7356     tcg_gen_add_i64(val, val, tmp);
7357     tcg_temp_free_i64(tmp);
7358 }
7359
7360 /* load and add a 64-bit value from a register pair.  */
7361 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7362 {
7363     TCGv_i64 tmp;
7364     TCGv_i32 tmpl;
7365     TCGv_i32 tmph;
7366
7367     /* Load 64-bit value rd:rn.  */
7368     tmpl = load_reg(s, rlow);
7369     tmph = load_reg(s, rhigh);
7370     tmp = tcg_temp_new_i64();
7371     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7372     tcg_temp_free_i32(tmpl);
7373     tcg_temp_free_i32(tmph);
7374     tcg_gen_add_i64(val, val, tmp);
7375     tcg_temp_free_i64(tmp);
7376 }
7377
7378 /* Set N and Z flags from hi|lo.  */
7379 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7380 {
7381     tcg_gen_mov_i32(cpu_NF, hi);
7382     tcg_gen_or_i32(cpu_ZF, lo, hi);
7383 }
7384
7385 /* Load/Store exclusive instructions are implemented by remembering
7386    the value/address loaded, and seeing if these are the same
7387    when the store is performed. This should be sufficient to implement
7388    the architecturally mandated semantics, and avoids having to monitor
7389    regular stores.
7390
7391    In system emulation mode only one CPU will be running at once, so
7392    this sequence is effectively atomic.  In user emulation mode we
7393    throw an exception and handle the atomic operation elsewhere.  */
7394 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7395                                TCGv_i32 addr, int size)
7396 {
7397     TCGv_i32 tmp = tcg_temp_new_i32();
7398
7399     s->is_ldex = true;
7400
7401     switch (size) {
7402     case 0:
7403         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7404         break;
7405     case 1:
7406         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
7407         break;
7408     case 2:
7409     case 3:
7410         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7411         break;
7412     default:
7413         abort();
7414     }
7415
7416     if (size == 3) {
7417         TCGv_i32 tmp2 = tcg_temp_new_i32();
7418         TCGv_i32 tmp3 = tcg_temp_new_i32();
7419
7420         tcg_gen_addi_i32(tmp2, addr, 4);
7421         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7422         tcg_temp_free_i32(tmp2);
7423         tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
7424         store_reg(s, rt2, tmp3);
7425     } else {
7426         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7427     }
7428
7429     store_reg(s, rt, tmp);
7430     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7431 }
7432
7433 static void gen_clrex(DisasContext *s)
7434 {
7435     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7436 }
7437
7438 #ifdef CONFIG_USER_ONLY
7439 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7440                                 TCGv_i32 addr, int size)
7441 {
7442     tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
7443     tcg_gen_movi_i32(cpu_exclusive_info,
7444                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
7445     gen_exception_internal_insn(s, 4, EXCP_STREX);
7446 }
7447 #else
7448 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7449                                 TCGv_i32 addr, int size)
7450 {
7451     TCGv_i32 tmp;
7452     TCGv_i64 val64, extaddr;
7453     TCGLabel *done_label;
7454     TCGLabel *fail_label;
7455
7456     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7457          [addr] = {Rt};
7458          {Rd} = 0;
7459        } else {
7460          {Rd} = 1;
7461        } */
7462     fail_label = gen_new_label();
7463     done_label = gen_new_label();
7464     extaddr = tcg_temp_new_i64();
7465     tcg_gen_extu_i32_i64(extaddr, addr);
7466     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7467     tcg_temp_free_i64(extaddr);
7468
7469     tmp = tcg_temp_new_i32();
7470     switch (size) {
7471     case 0:
7472         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7473         break;
7474     case 1:
7475         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
7476         break;
7477     case 2:
7478     case 3:
7479         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7480         break;
7481     default:
7482         abort();
7483     }
7484
7485     val64 = tcg_temp_new_i64();
7486     if (size == 3) {
7487         TCGv_i32 tmp2 = tcg_temp_new_i32();
7488         TCGv_i32 tmp3 = tcg_temp_new_i32();
7489         tcg_gen_addi_i32(tmp2, addr, 4);
7490         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7491         tcg_temp_free_i32(tmp2);
7492         tcg_gen_concat_i32_i64(val64, tmp, tmp3);
7493         tcg_temp_free_i32(tmp3);
7494     } else {
7495         tcg_gen_extu_i32_i64(val64, tmp);
7496     }
7497     tcg_temp_free_i32(tmp);
7498
7499     tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
7500     tcg_temp_free_i64(val64);
7501
7502     tmp = load_reg(s, rt);
7503     switch (size) {
7504     case 0:
7505         gen_aa32_st8(tmp, addr, get_mem_index(s));
7506         break;
7507     case 1:
7508         gen_aa32_st16(tmp, addr, get_mem_index(s));
7509         break;
7510     case 2:
7511     case 3:
7512         gen_aa32_st32(tmp, addr, get_mem_index(s));
7513         break;
7514     default:
7515         abort();
7516     }
7517     tcg_temp_free_i32(tmp);
7518     if (size == 3) {
7519         tcg_gen_addi_i32(addr, addr, 4);
7520         tmp = load_reg(s, rt2);
7521         gen_aa32_st32(tmp, addr, get_mem_index(s));
7522         tcg_temp_free_i32(tmp);
7523     }
7524     tcg_gen_movi_i32(cpu_R[rd], 0);
7525     tcg_gen_br(done_label);
7526     gen_set_label(fail_label);
7527     tcg_gen_movi_i32(cpu_R[rd], 1);
7528     gen_set_label(done_label);
7529     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7530 }
7531 #endif
7532
7533 /* gen_srs:
7534  * @env: CPUARMState
7535  * @s: DisasContext
7536  * @mode: mode field from insn (which stack to store to)
7537  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7538  * @writeback: true if writeback bit set
7539  *
7540  * Generate code for the SRS (Store Return State) insn.
7541  */
7542 static void gen_srs(DisasContext *s,
7543                     uint32_t mode, uint32_t amode, bool writeback)
7544 {
7545     int32_t offset;
7546     TCGv_i32 addr = tcg_temp_new_i32();
7547     TCGv_i32 tmp = tcg_const_i32(mode);
7548     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7549     tcg_temp_free_i32(tmp);
7550     switch (amode) {
7551     case 0: /* DA */
7552         offset = -4;
7553         break;
7554     case 1: /* IA */
7555         offset = 0;
7556         break;
7557     case 2: /* DB */
7558         offset = -8;
7559         break;
7560     case 3: /* IB */
7561         offset = 4;
7562         break;
7563     default:
7564         abort();
7565     }
7566     tcg_gen_addi_i32(addr, addr, offset);
7567     tmp = load_reg(s, 14);
7568     gen_aa32_st32(tmp, addr, get_mem_index(s));
7569     tcg_temp_free_i32(tmp);
7570     tmp = load_cpu_field(spsr);
7571     tcg_gen_addi_i32(addr, addr, 4);
7572     gen_aa32_st32(tmp, addr, get_mem_index(s));
7573     tcg_temp_free_i32(tmp);
7574     if (writeback) {
7575         switch (amode) {
7576         case 0:
7577             offset = -8;
7578             break;
7579         case 1:
7580             offset = 4;
7581             break;
7582         case 2:
7583             offset = -4;
7584             break;
7585         case 3:
7586             offset = 0;
7587             break;
7588         default:
7589             abort();
7590         }
7591         tcg_gen_addi_i32(addr, addr, offset);
7592         tmp = tcg_const_i32(mode);
7593         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7594         tcg_temp_free_i32(tmp);
7595     }
7596     tcg_temp_free_i32(addr);
7597 }
7598
7599 static void disas_arm_insn(DisasContext *s, unsigned int insn)
7600 {
7601     unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
7602     TCGv_i32 tmp;
7603     TCGv_i32 tmp2;
7604     TCGv_i32 tmp3;
7605     TCGv_i32 addr;
7606     TCGv_i64 tmp64;
7607
7608     /* M variants do not implement ARM mode.  */
7609     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7610         goto illegal_op;
7611     }
7612     cond = insn >> 28;
7613     if (cond == 0xf){
7614         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
7615          * choose to UNDEF. In ARMv5 and above the space is used
7616          * for miscellaneous unconditional instructions.
7617          */
7618         ARCH(5);
7619
7620         /* Unconditional instructions.  */
7621         if (((insn >> 25) & 7) == 1) {
7622             /* NEON Data processing.  */
7623             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7624                 goto illegal_op;
7625             }
7626
7627             if (disas_neon_data_insn(s, insn)) {
7628                 goto illegal_op;
7629             }
7630             return;
7631         }
7632         if ((insn & 0x0f100000) == 0x04000000) {
7633             /* NEON load/store.  */
7634             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7635                 goto illegal_op;
7636             }
7637
7638             if (disas_neon_ls_insn(s, insn)) {
7639                 goto illegal_op;
7640             }
7641             return;
7642         }
7643         if ((insn & 0x0f000e10) == 0x0e000a00) {
7644             /* VFP.  */
7645             if (disas_vfp_insn(s, insn)) {
7646                 goto illegal_op;
7647             }
7648             return;
7649         }
7650         if (((insn & 0x0f30f000) == 0x0510f000) ||
7651             ((insn & 0x0f30f010) == 0x0710f000)) {
7652             if ((insn & (1 << 22)) == 0) {
7653                 /* PLDW; v7MP */
7654                 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7655                     goto illegal_op;
7656                 }
7657             }
7658             /* Otherwise PLD; v5TE+ */
7659             ARCH(5TE);
7660             return;
7661         }
7662         if (((insn & 0x0f70f000) == 0x0450f000) ||
7663             ((insn & 0x0f70f010) == 0x0650f000)) {
7664             ARCH(7);
7665             return; /* PLI; V7 */
7666         }
7667         if (((insn & 0x0f700000) == 0x04100000) ||
7668             ((insn & 0x0f700010) == 0x06100000)) {
7669             if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7670                 goto illegal_op;
7671             }
7672             return; /* v7MP: Unallocated memory hint: must NOP */
7673         }
7674
7675         if ((insn & 0x0ffffdff) == 0x01010000) {
7676             ARCH(6);
7677             /* setend */
7678             if (((insn >> 9) & 1) != s->bswap_code) {
7679                 /* Dynamic endianness switching not implemented. */
7680                 qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
7681                 goto illegal_op;
7682             }
7683             return;
7684         } else if ((insn & 0x0fffff00) == 0x057ff000) {
7685             switch ((insn >> 4) & 0xf) {
7686             case 1: /* clrex */
7687                 ARCH(6K);
7688                 gen_clrex(s);
7689                 return;
7690             case 4: /* dsb */
7691             case 5: /* dmb */
7692             case 6: /* isb */
7693                 ARCH(7);
7694                 /* We don't emulate caches so these are a no-op.  */
7695                 return;
7696             default:
7697                 goto illegal_op;
7698             }
7699         } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
7700             /* srs */
7701             if (IS_USER(s)) {
7702                 goto illegal_op;
7703             }
7704             ARCH(6);
7705             gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
7706             return;
7707         } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
7708             /* rfe */
7709             int32_t offset;
7710             if (IS_USER(s))
7711                 goto illegal_op;
7712             ARCH(6);
7713             rn = (insn >> 16) & 0xf;
7714             addr = load_reg(s, rn);
7715             i = (insn >> 23) & 3;
7716             switch (i) {
7717             case 0: offset = -4; break; /* DA */
7718             case 1: offset = 0; break; /* IA */
7719             case 2: offset = -8; break; /* DB */
7720             case 3: offset = 4; break; /* IB */
7721             default: abort();
7722             }
7723             if (offset)
7724                 tcg_gen_addi_i32(addr, addr, offset);
7725             /* Load PC into tmp and CPSR into tmp2.  */
7726             tmp = tcg_temp_new_i32();
7727             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7728             tcg_gen_addi_i32(addr, addr, 4);
7729             tmp2 = tcg_temp_new_i32();
7730             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
7731             if (insn & (1 << 21)) {
7732                 /* Base writeback.  */
7733                 switch (i) {
7734                 case 0: offset = -8; break;
7735                 case 1: offset = 4; break;
7736                 case 2: offset = -4; break;
7737                 case 3: offset = 0; break;
7738                 default: abort();
7739                 }
7740                 if (offset)
7741                     tcg_gen_addi_i32(addr, addr, offset);
7742                 store_reg(s, rn, addr);
7743             } else {
7744                 tcg_temp_free_i32(addr);
7745             }
7746             gen_rfe(s, tmp, tmp2);
7747             return;
7748         } else if ((insn & 0x0e000000) == 0x0a000000) {
7749             /* branch link and change to thumb (blx <offset>) */
7750             int32_t offset;
7751
7752             val = (uint32_t)s->pc;
7753             tmp = tcg_temp_new_i32();
7754             tcg_gen_movi_i32(tmp, val);
7755             store_reg(s, 14, tmp);
7756             /* Sign-extend the 24-bit offset */
7757             offset = (((int32_t)insn) << 8) >> 8;
7758             /* offset * 4 + bit24 * 2 + (thumb bit) */
7759             val += (offset << 2) | ((insn >> 23) & 2) | 1;
7760             /* pipeline offset */
7761             val += 4;
7762             /* protected by ARCH(5); above, near the start of uncond block */
7763             gen_bx_im(s, val);
7764             return;
7765         } else if ((insn & 0x0e000f00) == 0x0c000100) {
7766             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7767                 /* iWMMXt register transfer.  */
7768                 if (extract32(s->c15_cpar, 1, 1)) {
7769                     if (!disas_iwmmxt_insn(s, insn)) {
7770                         return;
7771                     }
7772                 }
7773             }
7774         } else if ((insn & 0x0fe00000) == 0x0c400000) {
7775             /* Coprocessor double register transfer.  */
7776             ARCH(5TE);
7777         } else if ((insn & 0x0f000010) == 0x0e000010) {
7778             /* Additional coprocessor register transfer.  */
7779         } else if ((insn & 0x0ff10020) == 0x01000000) {
7780             uint32_t mask;
7781             uint32_t val;
7782             /* cps (privileged) */
7783             if (IS_USER(s))
7784                 return;
7785             mask = val = 0;
7786             if (insn & (1 << 19)) {
7787                 if (insn & (1 << 8))
7788                     mask |= CPSR_A;
7789                 if (insn & (1 << 7))
7790                     mask |= CPSR_I;
7791                 if (insn & (1 << 6))
7792                     mask |= CPSR_F;
7793                 if (insn & (1 << 18))
7794                     val |= mask;
7795             }
7796             if (insn & (1 << 17)) {
7797                 mask |= CPSR_M;
7798                 val |= (insn & 0x1f);
7799             }
7800             if (mask) {
7801                 gen_set_psr_im(s, mask, 0, val);
7802             }
7803             return;
7804         }
7805         goto illegal_op;
7806     }
7807     if (cond != 0xe) {
7808         /* if not always execute, we generate a conditional jump to
7809            next instruction */
7810         s->condlabel = gen_new_label();
7811         arm_gen_test_cc(cond ^ 1, s->condlabel);
7812         s->condjmp = 1;
7813     }
7814     if ((insn & 0x0f900000) == 0x03000000) {
7815         if ((insn & (1 << 21)) == 0) {
7816             ARCH(6T2);
7817             rd = (insn >> 12) & 0xf;
7818             val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
7819             if ((insn & (1 << 22)) == 0) {
7820                 /* MOVW */
7821                 tmp = tcg_temp_new_i32();
7822                 tcg_gen_movi_i32(tmp, val);
7823             } else {
7824                 /* MOVT */
7825                 tmp = load_reg(s, rd);
7826                 tcg_gen_ext16u_i32(tmp, tmp);
7827                 tcg_gen_ori_i32(tmp, tmp, val << 16);
7828             }
7829             store_reg(s, rd, tmp);
7830         } else {
7831             if (((insn >> 12) & 0xf) != 0xf)
7832                 goto illegal_op;
7833             if (((insn >> 16) & 0xf) == 0) {
7834                 gen_nop_hint(s, insn & 0xff);
7835             } else {
7836                 /* CPSR = immediate */
7837                 val = insn & 0xff;
7838                 shift = ((insn >> 8) & 0xf) * 2;
7839                 if (shift)
7840                     val = (val >> shift) | (val << (32 - shift));
7841                 i = ((insn & (1 << 22)) != 0);
7842                 if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
7843                                    i, val)) {
7844                     goto illegal_op;
7845                 }
7846             }
7847         }
7848     } else if ((insn & 0x0f900000) == 0x01000000
7849                && (insn & 0x00000090) != 0x00000090) {
7850         /* miscellaneous instructions */
7851         op1 = (insn >> 21) & 3;
7852         sh = (insn >> 4) & 0xf;
7853         rm = insn & 0xf;
7854         switch (sh) {
7855         case 0x0: /* move program status register */
7856             if (op1 & 1) {
7857                 /* PSR = reg */
7858                 tmp = load_reg(s, rm);
7859                 i = ((op1 & 2) != 0);
7860                 if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
7861                     goto illegal_op;
7862             } else {
7863                 /* reg = PSR */
7864                 rd = (insn >> 12) & 0xf;
7865                 if (op1 & 2) {
7866                     if (IS_USER(s))
7867                         goto illegal_op;
7868                     tmp = load_cpu_field(spsr);
7869                 } else {
7870                     tmp = tcg_temp_new_i32();
7871                     gen_helper_cpsr_read(tmp, cpu_env);
7872                 }
7873                 store_reg(s, rd, tmp);
7874             }
7875             break;
7876         case 0x1:
7877             if (op1 == 1) {
7878                 /* branch/exchange thumb (bx).  */
7879                 ARCH(4T);
7880                 tmp = load_reg(s, rm);
7881                 gen_bx(s, tmp);
7882             } else if (op1 == 3) {
7883                 /* clz */
7884                 ARCH(5);
7885                 rd = (insn >> 12) & 0xf;
7886                 tmp = load_reg(s, rm);
7887                 gen_helper_clz(tmp, tmp);
7888                 store_reg(s, rd, tmp);
7889             } else {
7890                 goto illegal_op;
7891             }
7892             break;
7893         case 0x2:
7894             if (op1 == 1) {
7895                 ARCH(5J); /* bxj */
7896                 /* Trivial implementation equivalent to bx.  */
7897                 tmp = load_reg(s, rm);
7898                 gen_bx(s, tmp);
7899             } else {
7900                 goto illegal_op;
7901             }
7902             break;
7903         case 0x3:
7904             if (op1 != 1)
7905               goto illegal_op;
7906
7907             ARCH(5);
7908             /* branch link/exchange thumb (blx) */
7909             tmp = load_reg(s, rm);
7910             tmp2 = tcg_temp_new_i32();
7911             tcg_gen_movi_i32(tmp2, s->pc);
7912             store_reg(s, 14, tmp2);
7913             gen_bx(s, tmp);
7914             break;
7915         case 0x4:
7916         {
7917             /* crc32/crc32c */
7918             uint32_t c = extract32(insn, 8, 4);
7919
7920             /* Check this CPU supports ARMv8 CRC instructions.
7921              * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
7922              * Bits 8, 10 and 11 should be zero.
7923              */
7924             if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
7925                 (c & 0xd) != 0) {
7926                 goto illegal_op;
7927             }
7928
7929             rn = extract32(insn, 16, 4);
7930             rd = extract32(insn, 12, 4);
7931
7932             tmp = load_reg(s, rn);
7933             tmp2 = load_reg(s, rm);
7934             if (op1 == 0) {
7935                 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
7936             } else if (op1 == 1) {
7937                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
7938             }
7939             tmp3 = tcg_const_i32(1 << op1);
7940             if (c & 0x2) {
7941                 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
7942             } else {
7943                 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
7944             }
7945             tcg_temp_free_i32(tmp2);
7946             tcg_temp_free_i32(tmp3);
7947             store_reg(s, rd, tmp);
7948             break;
7949         }
7950         case 0x5: /* saturating add/subtract */
7951             ARCH(5TE);
7952             rd = (insn >> 12) & 0xf;
7953             rn = (insn >> 16) & 0xf;
7954             tmp = load_reg(s, rm);
7955             tmp2 = load_reg(s, rn);
7956             if (op1 & 2)
7957                 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
7958             if (op1 & 1)
7959                 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
7960             else
7961                 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
7962             tcg_temp_free_i32(tmp2);
7963             store_reg(s, rd, tmp);
7964             break;
7965         case 7:
7966         {
7967             int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
7968             switch (op1) {
7969             case 1:
7970                 /* bkpt */
7971                 ARCH(5);
7972                 gen_exception_insn(s, 4, EXCP_BKPT,
7973                                    syn_aa32_bkpt(imm16, false),
7974                                    default_exception_el(s));
7975                 break;
7976             case 2:
7977                 /* Hypervisor call (v7) */
7978                 ARCH(7);
7979                 if (IS_USER(s)) {
7980                     goto illegal_op;
7981                 }
7982                 gen_hvc(s, imm16);
7983                 break;
7984             case 3:
7985                 /* Secure monitor call (v6+) */
7986                 ARCH(6K);
7987                 if (IS_USER(s)) {
7988                     goto illegal_op;
7989                 }
7990                 gen_smc(s);
7991                 break;
7992             default:
7993                 goto illegal_op;
7994             }
7995             break;
7996         }
7997         case 0x8: /* signed multiply */
7998         case 0xa:
7999         case 0xc:
8000         case 0xe:
8001             ARCH(5TE);
8002             rs = (insn >> 8) & 0xf;
8003             rn = (insn >> 12) & 0xf;
8004             rd = (insn >> 16) & 0xf;
8005             if (op1 == 1) {
8006                 /* (32 * 16) >> 16 */
8007                 tmp = load_reg(s, rm);
8008                 tmp2 = load_reg(s, rs);
8009                 if (sh & 4)
8010                     tcg_gen_sari_i32(tmp2, tmp2, 16);
8011                 else
8012                     gen_sxth(tmp2);
8013                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8014                 tcg_gen_shri_i64(tmp64, tmp64, 16);
8015                 tmp = tcg_temp_new_i32();
8016                 tcg_gen_trunc_i64_i32(tmp, tmp64);
8017                 tcg_temp_free_i64(tmp64);
8018                 if ((sh & 2) == 0) {
8019                     tmp2 = load_reg(s, rn);
8020                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8021                     tcg_temp_free_i32(tmp2);
8022                 }
8023                 store_reg(s, rd, tmp);
8024             } else {
8025                 /* 16 * 16 */
8026                 tmp = load_reg(s, rm);
8027                 tmp2 = load_reg(s, rs);
8028                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
8029                 tcg_temp_free_i32(tmp2);
8030                 if (op1 == 2) {
8031                     tmp64 = tcg_temp_new_i64();
8032                     tcg_gen_ext_i32_i64(tmp64, tmp);
8033                     tcg_temp_free_i32(tmp);
8034                     gen_addq(s, tmp64, rn, rd);
8035                     gen_storeq_reg(s, rn, rd, tmp64);
8036                     tcg_temp_free_i64(tmp64);
8037                 } else {
8038                     if (op1 == 0) {
8039                         tmp2 = load_reg(s, rn);
8040                         gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8041                         tcg_temp_free_i32(tmp2);
8042                     }
8043                     store_reg(s, rd, tmp);
8044                 }
8045             }
8046             break;
8047         default:
8048             goto illegal_op;
8049         }
8050     } else if (((insn & 0x0e000000) == 0 &&
8051                 (insn & 0x00000090) != 0x90) ||
8052                ((insn & 0x0e000000) == (1 << 25))) {
8053         int set_cc, logic_cc, shiftop;
8054
8055         op1 = (insn >> 21) & 0xf;
8056         set_cc = (insn >> 20) & 1;
8057         logic_cc = table_logic_cc[op1] & set_cc;
8058
8059         /* data processing instruction */
8060         if (insn & (1 << 25)) {
8061             /* immediate operand */
8062             val = insn & 0xff;
8063             shift = ((insn >> 8) & 0xf) * 2;
8064             if (shift) {
8065                 val = (val >> shift) | (val << (32 - shift));
8066             }
8067             tmp2 = tcg_temp_new_i32();
8068             tcg_gen_movi_i32(tmp2, val);
8069             if (logic_cc && shift) {
8070                 gen_set_CF_bit31(tmp2);
8071             }
8072         } else {
8073             /* register */
8074             rm = (insn) & 0xf;
8075             tmp2 = load_reg(s, rm);
8076             shiftop = (insn >> 5) & 3;
8077             if (!(insn & (1 << 4))) {
8078                 shift = (insn >> 7) & 0x1f;
8079                 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8080             } else {
8081                 rs = (insn >> 8) & 0xf;
8082                 tmp = load_reg(s, rs);
8083                 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
8084             }
8085         }
8086         if (op1 != 0x0f && op1 != 0x0d) {
8087             rn = (insn >> 16) & 0xf;
8088             tmp = load_reg(s, rn);
8089         } else {
8090             TCGV_UNUSED_I32(tmp);
8091         }
8092         rd = (insn >> 12) & 0xf;
8093         switch(op1) {
8094         case 0x00:
8095             tcg_gen_and_i32(tmp, tmp, tmp2);
8096             if (logic_cc) {
8097                 gen_logic_CC(tmp);
8098             }
8099             store_reg_bx(s, rd, tmp);
8100             break;
8101         case 0x01:
8102             tcg_gen_xor_i32(tmp, tmp, tmp2);
8103             if (logic_cc) {
8104                 gen_logic_CC(tmp);
8105             }
8106             store_reg_bx(s, rd, tmp);
8107             break;
8108         case 0x02:
8109             if (set_cc && rd == 15) {
8110                 /* SUBS r15, ... is used for exception return.  */
8111                 if (IS_USER(s)) {
8112                     goto illegal_op;
8113                 }
8114                 gen_sub_CC(tmp, tmp, tmp2);
8115                 gen_exception_return(s, tmp);
8116             } else {
8117                 if (set_cc) {
8118                     gen_sub_CC(tmp, tmp, tmp2);
8119                 } else {
8120                     tcg_gen_sub_i32(tmp, tmp, tmp2);
8121                 }
8122                 store_reg_bx(s, rd, tmp);
8123             }
8124             break;
8125         case 0x03:
8126             if (set_cc) {
8127                 gen_sub_CC(tmp, tmp2, tmp);
8128             } else {
8129                 tcg_gen_sub_i32(tmp, tmp2, tmp);
8130             }
8131             store_reg_bx(s, rd, tmp);
8132             break;
8133         case 0x04:
8134             if (set_cc) {
8135                 gen_add_CC(tmp, tmp, tmp2);
8136             } else {
8137                 tcg_gen_add_i32(tmp, tmp, tmp2);
8138             }
8139             store_reg_bx(s, rd, tmp);
8140             break;
8141         case 0x05:
8142             if (set_cc) {
8143                 gen_adc_CC(tmp, tmp, tmp2);
8144             } else {
8145                 gen_add_carry(tmp, tmp, tmp2);
8146             }
8147             store_reg_bx(s, rd, tmp);
8148             break;
8149         case 0x06:
8150             if (set_cc) {
8151                 gen_sbc_CC(tmp, tmp, tmp2);
8152             } else {
8153                 gen_sub_carry(tmp, tmp, tmp2);
8154             }
8155             store_reg_bx(s, rd, tmp);
8156             break;
8157         case 0x07:
8158             if (set_cc) {
8159                 gen_sbc_CC(tmp, tmp2, tmp);
8160             } else {
8161                 gen_sub_carry(tmp, tmp2, tmp);
8162             }
8163             store_reg_bx(s, rd, tmp);
8164             break;
8165         case 0x08:
8166             if (set_cc) {
8167                 tcg_gen_and_i32(tmp, tmp, tmp2);
8168                 gen_logic_CC(tmp);
8169             }
8170             tcg_temp_free_i32(tmp);
8171             break;
8172         case 0x09:
8173             if (set_cc) {
8174                 tcg_gen_xor_i32(tmp, tmp, tmp2);
8175                 gen_logic_CC(tmp);
8176             }
8177             tcg_temp_free_i32(tmp);
8178             break;
8179         case 0x0a:
8180             if (set_cc) {
8181                 gen_sub_CC(tmp, tmp, tmp2);
8182             }
8183             tcg_temp_free_i32(tmp);
8184             break;
8185         case 0x0b:
8186             if (set_cc) {
8187                 gen_add_CC(tmp, tmp, tmp2);
8188             }
8189             tcg_temp_free_i32(tmp);
8190             break;
8191         case 0x0c:
8192             tcg_gen_or_i32(tmp, tmp, tmp2);
8193             if (logic_cc) {
8194                 gen_logic_CC(tmp);
8195             }
8196             store_reg_bx(s, rd, tmp);
8197             break;
8198         case 0x0d:
8199             if (logic_cc && rd == 15) {
8200                 /* MOVS r15, ... is used for exception return.  */
8201                 if (IS_USER(s)) {
8202                     goto illegal_op;
8203                 }
8204                 gen_exception_return(s, tmp2);
8205             } else {
8206                 if (logic_cc) {
8207                     gen_logic_CC(tmp2);
8208                 }
8209                 store_reg_bx(s, rd, tmp2);
8210             }
8211             break;
8212         case 0x0e:
8213             tcg_gen_andc_i32(tmp, tmp, tmp2);
8214             if (logic_cc) {
8215                 gen_logic_CC(tmp);
8216             }
8217             store_reg_bx(s, rd, tmp);
8218             break;
8219         default:
8220         case 0x0f:
8221             tcg_gen_not_i32(tmp2, tmp2);
8222             if (logic_cc) {
8223                 gen_logic_CC(tmp2);
8224             }
8225             store_reg_bx(s, rd, tmp2);
8226             break;
8227         }
8228         if (op1 != 0x0f && op1 != 0x0d) {
8229             tcg_temp_free_i32(tmp2);
8230         }
8231     } else {
8232         /* other instructions */
8233         op1 = (insn >> 24) & 0xf;
8234         switch(op1) {
8235         case 0x0:
8236         case 0x1:
8237             /* multiplies, extra load/stores */
8238             sh = (insn >> 5) & 3;
8239             if (sh == 0) {
8240                 if (op1 == 0x0) {
8241                     rd = (insn >> 16) & 0xf;
8242                     rn = (insn >> 12) & 0xf;
8243                     rs = (insn >> 8) & 0xf;
8244                     rm = (insn) & 0xf;
8245                     op1 = (insn >> 20) & 0xf;
8246                     switch (op1) {
8247                     case 0: case 1: case 2: case 3: case 6:
8248                         /* 32 bit mul */
8249                         tmp = load_reg(s, rs);
8250                         tmp2 = load_reg(s, rm);
8251                         tcg_gen_mul_i32(tmp, tmp, tmp2);
8252                         tcg_temp_free_i32(tmp2);
8253                         if (insn & (1 << 22)) {
8254                             /* Subtract (mls) */
8255                             ARCH(6T2);
8256                             tmp2 = load_reg(s, rn);
8257                             tcg_gen_sub_i32(tmp, tmp2, tmp);
8258                             tcg_temp_free_i32(tmp2);
8259                         } else if (insn & (1 << 21)) {
8260                             /* Add */
8261                             tmp2 = load_reg(s, rn);
8262                             tcg_gen_add_i32(tmp, tmp, tmp2);
8263                             tcg_temp_free_i32(tmp2);
8264                         }
8265                         if (insn & (1 << 20))
8266                             gen_logic_CC(tmp);
8267                         store_reg(s, rd, tmp);
8268                         break;
8269                     case 4:
8270                         /* 64 bit mul double accumulate (UMAAL) */
8271                         ARCH(6);
8272                         tmp = load_reg(s, rs);
8273                         tmp2 = load_reg(s, rm);
8274                         tmp64 = gen_mulu_i64_i32(tmp, tmp2);
8275                         gen_addq_lo(s, tmp64, rn);
8276                         gen_addq_lo(s, tmp64, rd);
8277                         gen_storeq_reg(s, rn, rd, tmp64);
8278                         tcg_temp_free_i64(tmp64);
8279                         break;
8280                     case 8: case 9: case 10: case 11:
8281                     case 12: case 13: case 14: case 15:
8282                         /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
8283                         tmp = load_reg(s, rs);
8284                         tmp2 = load_reg(s, rm);
8285                         if (insn & (1 << 22)) {
8286                             tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
8287                         } else {
8288                             tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
8289                         }
8290                         if (insn & (1 << 21)) { /* mult accumulate */
8291                             TCGv_i32 al = load_reg(s, rn);
8292                             TCGv_i32 ah = load_reg(s, rd);
8293                             tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
8294                             tcg_temp_free_i32(al);
8295                             tcg_temp_free_i32(ah);
8296                         }
8297                         if (insn & (1 << 20)) {
8298                             gen_logicq_cc(tmp, tmp2);
8299                         }
8300                         store_reg(s, rn, tmp);
8301                         store_reg(s, rd, tmp2);
8302                         break;
8303                     default:
8304                         goto illegal_op;
8305                     }
8306                 } else {
8307                     rn = (insn >> 16) & 0xf;
8308                     rd = (insn >> 12) & 0xf;
8309                     if (insn & (1 << 23)) {
8310                         /* load/store exclusive */
8311                         int op2 = (insn >> 8) & 3;
8312                         op1 = (insn >> 21) & 0x3;
8313
8314                         switch (op2) {
8315                         case 0: /* lda/stl */
8316                             if (op1 == 1) {
8317                                 goto illegal_op;
8318                             }
8319                             ARCH(8);
8320                             break;
8321                         case 1: /* reserved */
8322                             goto illegal_op;
8323                         case 2: /* ldaex/stlex */
8324                             ARCH(8);
8325                             break;
8326                         case 3: /* ldrex/strex */
8327                             if (op1) {
8328                                 ARCH(6K);
8329                             } else {
8330                                 ARCH(6);
8331                             }
8332                             break;
8333                         }
8334
8335                         addr = tcg_temp_local_new_i32();
8336                         load_reg_var(s, addr, rn);
8337
8338                         /* Since the emulation does not have barriers,
8339                            the acquire/release semantics need no special
8340                            handling */
8341                         if (op2 == 0) {
8342                             if (insn & (1 << 20)) {
8343                                 tmp = tcg_temp_new_i32();
8344                                 switch (op1) {
8345                                 case 0: /* lda */
8346                                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8347                                     break;
8348                                 case 2: /* ldab */
8349                                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
8350                                     break;
8351                                 case 3: /* ldah */
8352                                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8353                                     break;
8354                                 default:
8355                                     abort();
8356                                 }
8357                                 store_reg(s, rd, tmp);
8358                             } else {
8359                                 rm = insn & 0xf;
8360                                 tmp = load_reg(s, rm);
8361                                 switch (op1) {
8362                                 case 0: /* stl */
8363                                     gen_aa32_st32(tmp, addr, get_mem_index(s));
8364                                     break;
8365                                 case 2: /* stlb */
8366                                     gen_aa32_st8(tmp, addr, get_mem_index(s));
8367                                     break;
8368                                 case 3: /* stlh */
8369                                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8370                                     break;
8371                                 default:
8372                                     abort();
8373                                 }
8374                                 tcg_temp_free_i32(tmp);
8375                             }
8376                         } else if (insn & (1 << 20)) {
8377                             switch (op1) {
8378                             case 0: /* ldrex */
8379                                 gen_load_exclusive(s, rd, 15, addr, 2);
8380                                 break;
8381                             case 1: /* ldrexd */
8382                                 gen_load_exclusive(s, rd, rd + 1, addr, 3);
8383                                 break;
8384                             case 2: /* ldrexb */
8385                                 gen_load_exclusive(s, rd, 15, addr, 0);
8386                                 break;
8387                             case 3: /* ldrexh */
8388                                 gen_load_exclusive(s, rd, 15, addr, 1);
8389                                 break;
8390                             default:
8391                                 abort();
8392                             }
8393                         } else {
8394                             rm = insn & 0xf;
8395                             switch (op1) {
8396                             case 0:  /*  strex */
8397                                 gen_store_exclusive(s, rd, rm, 15, addr, 2);
8398                                 break;
8399                             case 1: /*  strexd */
8400                                 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
8401                                 break;
8402                             case 2: /*  strexb */
8403                                 gen_store_exclusive(s, rd, rm, 15, addr, 0);
8404                                 break;
8405                             case 3: /* strexh */
8406                                 gen_store_exclusive(s, rd, rm, 15, addr, 1);
8407                                 break;
8408                             default:
8409                                 abort();
8410                             }
8411                         }
8412                         tcg_temp_free_i32(addr);
8413                     } else {
8414                         /* SWP instruction */
8415                         rm = (insn) & 0xf;
8416
8417                         /* ??? This is not really atomic.  However we know
8418                            we never have multiple CPUs running in parallel,
8419                            so it is good enough.  */
8420                         addr = load_reg(s, rn);
8421                         tmp = load_reg(s, rm);
8422                         tmp2 = tcg_temp_new_i32();
8423                         if (insn & (1 << 22)) {
8424                             gen_aa32_ld8u(tmp2, addr, get_mem_index(s));
8425                             gen_aa32_st8(tmp, addr, get_mem_index(s));
8426                         } else {
8427                             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
8428                             gen_aa32_st32(tmp, addr, get_mem_index(s));
8429                         }
8430                         tcg_temp_free_i32(tmp);
8431                         tcg_temp_free_i32(addr);
8432                         store_reg(s, rd, tmp2);
8433                     }
8434                 }
8435             } else {
8436                 int address_offset;
8437                 bool load = insn & (1 << 20);
8438                 bool doubleword = false;
8439                 /* Misc load/store */
8440                 rn = (insn >> 16) & 0xf;
8441                 rd = (insn >> 12) & 0xf;
8442
8443                 if (!load && (sh & 2)) {
8444                     /* doubleword */
8445                     ARCH(5TE);
8446                     if (rd & 1) {
8447                         /* UNPREDICTABLE; we choose to UNDEF */
8448                         goto illegal_op;
8449                     }
8450                     load = (sh & 1) == 0;
8451                     doubleword = true;
8452                 }
8453
8454                 addr = load_reg(s, rn);
8455                 if (insn & (1 << 24))
8456                     gen_add_datah_offset(s, insn, 0, addr);
8457                 address_offset = 0;
8458
8459                 if (doubleword) {
8460                     if (!load) {
8461                         /* store */
8462                         tmp = load_reg(s, rd);
8463                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8464                         tcg_temp_free_i32(tmp);
8465                         tcg_gen_addi_i32(addr, addr, 4);
8466                         tmp = load_reg(s, rd + 1);
8467                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8468                         tcg_temp_free_i32(tmp);
8469                     } else {
8470                         /* load */
8471                         tmp = tcg_temp_new_i32();
8472                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8473                         store_reg(s, rd, tmp);
8474                         tcg_gen_addi_i32(addr, addr, 4);
8475                         tmp = tcg_temp_new_i32();
8476                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8477                         rd++;
8478                     }
8479                     address_offset = -4;
8480                 } else if (load) {
8481                     /* load */
8482                     tmp = tcg_temp_new_i32();
8483                     switch (sh) {
8484                     case 1:
8485                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8486                         break;
8487                     case 2:
8488                         gen_aa32_ld8s(tmp, addr, get_mem_index(s));
8489                         break;
8490                     default:
8491                     case 3:
8492                         gen_aa32_ld16s(tmp, addr, get_mem_index(s));
8493                         break;
8494                     }
8495                 } else {
8496                     /* store */
8497                     tmp = load_reg(s, rd);
8498                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8499                     tcg_temp_free_i32(tmp);
8500                 }
8501                 /* Perform base writeback before the loaded value to
8502                    ensure correct behavior with overlapping index registers.
8503                    ldrd with base writeback is is undefined if the
8504                    destination and index registers overlap.  */
8505                 if (!(insn & (1 << 24))) {
8506                     gen_add_datah_offset(s, insn, address_offset, addr);
8507                     store_reg(s, rn, addr);
8508                 } else if (insn & (1 << 21)) {
8509                     if (address_offset)
8510                         tcg_gen_addi_i32(addr, addr, address_offset);
8511                     store_reg(s, rn, addr);
8512                 } else {
8513                     tcg_temp_free_i32(addr);
8514                 }
8515                 if (load) {
8516                     /* Complete the load.  */
8517                     store_reg(s, rd, tmp);
8518                 }
8519             }
8520             break;
8521         case 0x4:
8522         case 0x5:
8523             goto do_ldst;
8524         case 0x6:
8525         case 0x7:
8526             if (insn & (1 << 4)) {
8527                 ARCH(6);
8528                 /* Armv6 Media instructions.  */
8529                 rm = insn & 0xf;
8530                 rn = (insn >> 16) & 0xf;
8531                 rd = (insn >> 12) & 0xf;
8532                 rs = (insn >> 8) & 0xf;
8533                 switch ((insn >> 23) & 3) {
8534                 case 0: /* Parallel add/subtract.  */
8535                     op1 = (insn >> 20) & 7;
8536                     tmp = load_reg(s, rn);
8537                     tmp2 = load_reg(s, rm);
8538                     sh = (insn >> 5) & 7;
8539                     if ((op1 & 3) == 0 || sh == 5 || sh == 6)
8540                         goto illegal_op;
8541                     gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
8542                     tcg_temp_free_i32(tmp2);
8543                     store_reg(s, rd, tmp);
8544                     break;
8545                 case 1:
8546                     if ((insn & 0x00700020) == 0) {
8547                         /* Halfword pack.  */
8548                         tmp = load_reg(s, rn);
8549                         tmp2 = load_reg(s, rm);
8550                         shift = (insn >> 7) & 0x1f;
8551                         if (insn & (1 << 6)) {
8552                             /* pkhtb */
8553                             if (shift == 0)
8554                                 shift = 31;
8555                             tcg_gen_sari_i32(tmp2, tmp2, shift);
8556                             tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
8557                             tcg_gen_ext16u_i32(tmp2, tmp2);
8558                         } else {
8559                             /* pkhbt */
8560                             if (shift)
8561                                 tcg_gen_shli_i32(tmp2, tmp2, shift);
8562                             tcg_gen_ext16u_i32(tmp, tmp);
8563                             tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
8564                         }
8565                         tcg_gen_or_i32(tmp, tmp, tmp2);
8566                         tcg_temp_free_i32(tmp2);
8567                         store_reg(s, rd, tmp);
8568                     } else if ((insn & 0x00200020) == 0x00200000) {
8569                         /* [us]sat */
8570                         tmp = load_reg(s, rm);
8571                         shift = (insn >> 7) & 0x1f;
8572                         if (insn & (1 << 6)) {
8573                             if (shift == 0)
8574                                 shift = 31;
8575                             tcg_gen_sari_i32(tmp, tmp, shift);
8576                         } else {
8577                             tcg_gen_shli_i32(tmp, tmp, shift);
8578                         }
8579                         sh = (insn >> 16) & 0x1f;
8580                         tmp2 = tcg_const_i32(sh);
8581                         if (insn & (1 << 22))
8582                           gen_helper_usat(tmp, cpu_env, tmp, tmp2);
8583                         else
8584                           gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
8585                         tcg_temp_free_i32(tmp2);
8586                         store_reg(s, rd, tmp);
8587                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
8588                         /* [us]sat16 */
8589                         tmp = load_reg(s, rm);
8590                         sh = (insn >> 16) & 0x1f;
8591                         tmp2 = tcg_const_i32(sh);
8592                         if (insn & (1 << 22))
8593                           gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
8594                         else
8595                           gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
8596                         tcg_temp_free_i32(tmp2);
8597                         store_reg(s, rd, tmp);
8598                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
8599                         /* Select bytes.  */
8600                         tmp = load_reg(s, rn);
8601                         tmp2 = load_reg(s, rm);
8602                         tmp3 = tcg_temp_new_i32();
8603                         tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
8604                         gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
8605                         tcg_temp_free_i32(tmp3);
8606                         tcg_temp_free_i32(tmp2);
8607                         store_reg(s, rd, tmp);
8608                     } else if ((insn & 0x000003e0) == 0x00000060) {
8609                         tmp = load_reg(s, rm);
8610                         shift = (insn >> 10) & 3;
8611                         /* ??? In many cases it's not necessary to do a
8612                            rotate, a shift is sufficient.  */
8613                         if (shift != 0)
8614                             tcg_gen_rotri_i32(tmp, tmp, shift * 8);
8615                         op1 = (insn >> 20) & 7;
8616                         switch (op1) {
8617                         case 0: gen_sxtb16(tmp);  break;
8618                         case 2: gen_sxtb(tmp);    break;
8619                         case 3: gen_sxth(tmp);    break;
8620                         case 4: gen_uxtb16(tmp);  break;
8621                         case 6: gen_uxtb(tmp);    break;
8622                         case 7: gen_uxth(tmp);    break;
8623                         default: goto illegal_op;
8624                         }
8625                         if (rn != 15) {
8626                             tmp2 = load_reg(s, rn);
8627                             if ((op1 & 3) == 0) {
8628                                 gen_add16(tmp, tmp2);
8629                             } else {
8630                                 tcg_gen_add_i32(tmp, tmp, tmp2);
8631                                 tcg_temp_free_i32(tmp2);
8632                             }
8633                         }
8634                         store_reg(s, rd, tmp);
8635                     } else if ((insn & 0x003f0f60) == 0x003f0f20) {
8636                         /* rev */
8637                         tmp = load_reg(s, rm);
8638                         if (insn & (1 << 22)) {
8639                             if (insn & (1 << 7)) {
8640                                 gen_revsh(tmp);
8641                             } else {
8642                                 ARCH(6T2);
8643                                 gen_helper_rbit(tmp, tmp);
8644                             }
8645                         } else {
8646                             if (insn & (1 << 7))
8647                                 gen_rev16(tmp);
8648                             else
8649                                 tcg_gen_bswap32_i32(tmp, tmp);
8650                         }
8651                         store_reg(s, rd, tmp);
8652                     } else {
8653                         goto illegal_op;
8654                     }
8655                     break;
8656                 case 2: /* Multiplies (Type 3).  */
8657                     switch ((insn >> 20) & 0x7) {
8658                     case 5:
8659                         if (((insn >> 6) ^ (insn >> 7)) & 1) {
8660                             /* op2 not 00x or 11x : UNDEF */
8661                             goto illegal_op;
8662                         }
8663                         /* Signed multiply most significant [accumulate].
8664                            (SMMUL, SMMLA, SMMLS) */
8665                         tmp = load_reg(s, rm);
8666                         tmp2 = load_reg(s, rs);
8667                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
8668
8669                         if (rd != 15) {
8670                             tmp = load_reg(s, rd);
8671                             if (insn & (1 << 6)) {
8672                                 tmp64 = gen_subq_msw(tmp64, tmp);
8673                             } else {
8674                                 tmp64 = gen_addq_msw(tmp64, tmp);
8675                             }
8676                         }
8677                         if (insn & (1 << 5)) {
8678                             tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
8679                         }
8680                         tcg_gen_shri_i64(tmp64, tmp64, 32);
8681                         tmp = tcg_temp_new_i32();
8682                         tcg_gen_trunc_i64_i32(tmp, tmp64);
8683                         tcg_temp_free_i64(tmp64);
8684                         store_reg(s, rn, tmp);
8685                         break;
8686                     case 0:
8687                     case 4:
8688                         /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
8689                         if (insn & (1 << 7)) {
8690                             goto illegal_op;
8691                         }
8692                         tmp = load_reg(s, rm);
8693                         tmp2 = load_reg(s, rs);
8694                         if (insn & (1 << 5))
8695                             gen_swap_half(tmp2);
8696                         gen_smul_dual(tmp, tmp2);
8697                         if (insn & (1 << 22)) {
8698                             /* smlald, smlsld */
8699                             TCGv_i64 tmp64_2;
8700
8701                             tmp64 = tcg_temp_new_i64();
8702                             tmp64_2 = tcg_temp_new_i64();
8703                             tcg_gen_ext_i32_i64(tmp64, tmp);
8704                             tcg_gen_ext_i32_i64(tmp64_2, tmp2);
8705                             tcg_temp_free_i32(tmp);
8706                             tcg_temp_free_i32(tmp2);
8707                             if (insn & (1 << 6)) {
8708                                 tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
8709                             } else {
8710                                 tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
8711                             }
8712                             tcg_temp_free_i64(tmp64_2);
8713                             gen_addq(s, tmp64, rd, rn);
8714                             gen_storeq_reg(s, rd, rn, tmp64);
8715                             tcg_temp_free_i64(tmp64);
8716                         } else {
8717                             /* smuad, smusd, smlad, smlsd */
8718                             if (insn & (1 << 6)) {
8719                                 /* This subtraction cannot overflow. */
8720                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
8721                             } else {
8722                                 /* This addition cannot overflow 32 bits;
8723                                  * however it may overflow considered as a
8724                                  * signed operation, in which case we must set
8725                                  * the Q flag.
8726                                  */
8727                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8728                             }
8729                             tcg_temp_free_i32(tmp2);
8730                             if (rd != 15)
8731                               {
8732                                 tmp2 = load_reg(s, rd);
8733                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8734                                 tcg_temp_free_i32(tmp2);
8735                               }
8736                             store_reg(s, rn, tmp);
8737                         }
8738                         break;
8739                     case 1:
8740                     case 3:
8741                         /* SDIV, UDIV */
8742                         if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
8743                             goto illegal_op;
8744                         }
8745                         if (((insn >> 5) & 7) || (rd != 15)) {
8746                             goto illegal_op;
8747                         }
8748                         tmp = load_reg(s, rm);
8749                         tmp2 = load_reg(s, rs);
8750                         if (insn & (1 << 21)) {
8751                             gen_helper_udiv(tmp, tmp, tmp2);
8752                         } else {
8753                             gen_helper_sdiv(tmp, tmp, tmp2);
8754                         }
8755                         tcg_temp_free_i32(tmp2);
8756                         store_reg(s, rn, tmp);
8757                         break;
8758                     default:
8759                         goto illegal_op;
8760                     }
8761                     break;
8762                 case 3:
8763                     op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
8764                     switch (op1) {
8765                     case 0: /* Unsigned sum of absolute differences.  */
8766                         ARCH(6);
8767                         tmp = load_reg(s, rm);
8768                         tmp2 = load_reg(s, rs);
8769                         gen_helper_usad8(tmp, tmp, tmp2);
8770                         tcg_temp_free_i32(tmp2);
8771                         if (rd != 15) {
8772                             tmp2 = load_reg(s, rd);
8773                             tcg_gen_add_i32(tmp, tmp, tmp2);
8774                             tcg_temp_free_i32(tmp2);
8775                         }
8776                         store_reg(s, rn, tmp);
8777                         break;
8778                     case 0x20: case 0x24: case 0x28: case 0x2c:
8779                         /* Bitfield insert/clear.  */
8780                         ARCH(6T2);
8781                         shift = (insn >> 7) & 0x1f;
8782                         i = (insn >> 16) & 0x1f;
8783                         if (i < shift) {
8784                             /* UNPREDICTABLE; we choose to UNDEF */
8785                             goto illegal_op;
8786                         }
8787                         i = i + 1 - shift;
8788                         if (rm == 15) {
8789                             tmp = tcg_temp_new_i32();
8790                             tcg_gen_movi_i32(tmp, 0);
8791                         } else {
8792                             tmp = load_reg(s, rm);
8793                         }
8794                         if (i != 32) {
8795                             tmp2 = load_reg(s, rd);
8796                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
8797                             tcg_temp_free_i32(tmp2);
8798                         }
8799                         store_reg(s, rd, tmp);
8800                         break;
8801                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
8802                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
8803                         ARCH(6T2);
8804                         tmp = load_reg(s, rm);
8805                         shift = (insn >> 7) & 0x1f;
8806                         i = ((insn >> 16) & 0x1f) + 1;
8807                         if (shift + i > 32)
8808                             goto illegal_op;
8809                         if (i < 32) {
8810                             if (op1 & 0x20) {
8811                                 gen_ubfx(tmp, shift, (1u << i) - 1);
8812                             } else {
8813                                 gen_sbfx(tmp, shift, i);
8814                             }
8815                         }
8816                         store_reg(s, rd, tmp);
8817                         break;
8818                     default:
8819                         goto illegal_op;
8820                     }
8821                     break;
8822                 }
8823                 break;
8824             }
8825         do_ldst:
8826             /* Check for undefined extension instructions
8827              * per the ARM Bible IE:
8828              * xxxx 0111 1111 xxxx  xxxx xxxx 1111 xxxx
8829              */
8830             sh = (0xf << 20) | (0xf << 4);
8831             if (op1 == 0x7 && ((insn & sh) == sh))
8832             {
8833                 goto illegal_op;
8834             }
8835             /* load/store byte/word */
8836             rn = (insn >> 16) & 0xf;
8837             rd = (insn >> 12) & 0xf;
8838             tmp2 = load_reg(s, rn);
8839             if ((insn & 0x01200000) == 0x00200000) {
8840                 /* ldrt/strt */
8841                 i = get_a32_user_mem_index(s);
8842             } else {
8843                 i = get_mem_index(s);
8844             }
8845             if (insn & (1 << 24))
8846                 gen_add_data_offset(s, insn, tmp2);
8847             if (insn & (1 << 20)) {
8848                 /* load */
8849                 tmp = tcg_temp_new_i32();
8850                 if (insn & (1 << 22)) {
8851                     gen_aa32_ld8u(tmp, tmp2, i);
8852                 } else {
8853                     gen_aa32_ld32u(tmp, tmp2, i);
8854                 }
8855             } else {
8856                 /* store */
8857                 tmp = load_reg(s, rd);
8858                 if (insn & (1 << 22)) {
8859                     gen_aa32_st8(tmp, tmp2, i);
8860                 } else {
8861                     gen_aa32_st32(tmp, tmp2, i);
8862                 }
8863                 tcg_temp_free_i32(tmp);
8864             }
8865             if (!(insn & (1 << 24))) {
8866                 gen_add_data_offset(s, insn, tmp2);
8867                 store_reg(s, rn, tmp2);
8868             } else if (insn & (1 << 21)) {
8869                 store_reg(s, rn, tmp2);
8870             } else {
8871                 tcg_temp_free_i32(tmp2);
8872             }
8873             if (insn & (1 << 20)) {
8874                 /* Complete the load.  */
8875                 store_reg_from_load(s, rd, tmp);
8876             }
8877             break;
8878         case 0x08:
8879         case 0x09:
8880             {
8881                 int j, n, loaded_base;
8882                 bool exc_return = false;
8883                 bool is_load = extract32(insn, 20, 1);
8884                 bool user = false;
8885                 TCGv_i32 loaded_var;
8886                 /* load/store multiple words */
8887                 /* XXX: store correct base if write back */
8888                 if (insn & (1 << 22)) {
8889                     /* LDM (user), LDM (exception return) and STM (user) */
8890                     if (IS_USER(s))
8891                         goto illegal_op; /* only usable in supervisor mode */
8892
8893                     if (is_load && extract32(insn, 15, 1)) {
8894                         exc_return = true;
8895                     } else {
8896                         user = true;
8897                     }
8898                 }
8899                 rn = (insn >> 16) & 0xf;
8900                 addr = load_reg(s, rn);
8901
8902                 /* compute total size */
8903                 loaded_base = 0;
8904                 TCGV_UNUSED_I32(loaded_var);
8905                 n = 0;
8906                 for(i=0;i<16;i++) {
8907                     if (insn & (1 << i))
8908                         n++;
8909                 }
8910                 /* XXX: test invalid n == 0 case ? */
8911                 if (insn & (1 << 23)) {
8912                     if (insn & (1 << 24)) {
8913                         /* pre increment */
8914                         tcg_gen_addi_i32(addr, addr, 4);
8915                     } else {
8916                         /* post increment */
8917                     }
8918                 } else {
8919                     if (insn & (1 << 24)) {
8920                         /* pre decrement */
8921                         tcg_gen_addi_i32(addr, addr, -(n * 4));
8922                     } else {
8923                         /* post decrement */
8924                         if (n != 1)
8925                         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8926                     }
8927                 }
8928                 j = 0;
8929                 for(i=0;i<16;i++) {
8930                     if (insn & (1 << i)) {
8931                         if (is_load) {
8932                             /* load */
8933                             tmp = tcg_temp_new_i32();
8934                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8935                             if (user) {
8936                                 tmp2 = tcg_const_i32(i);
8937                                 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
8938                                 tcg_temp_free_i32(tmp2);
8939                                 tcg_temp_free_i32(tmp);
8940                             } else if (i == rn) {
8941                                 loaded_var = tmp;
8942                                 loaded_base = 1;
8943                             } else {
8944                                 store_reg_from_load(s, i, tmp);
8945                             }
8946                         } else {
8947                             /* store */
8948                             if (i == 15) {
8949                                 /* special case: r15 = PC + 8 */
8950                                 val = (long)s->pc + 4;
8951                                 tmp = tcg_temp_new_i32();
8952                                 tcg_gen_movi_i32(tmp, val);
8953                             } else if (user) {
8954                                 tmp = tcg_temp_new_i32();
8955                                 tmp2 = tcg_const_i32(i);
8956                                 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
8957                                 tcg_temp_free_i32(tmp2);
8958                             } else {
8959                                 tmp = load_reg(s, i);
8960                             }
8961                             gen_aa32_st32(tmp, addr, get_mem_index(s));
8962                             tcg_temp_free_i32(tmp);
8963                         }
8964                         j++;
8965                         /* no need to add after the last transfer */
8966                         if (j != n)
8967                             tcg_gen_addi_i32(addr, addr, 4);
8968                     }
8969                 }
8970                 if (insn & (1 << 21)) {
8971                     /* write back */
8972                     if (insn & (1 << 23)) {
8973                         if (insn & (1 << 24)) {
8974                             /* pre increment */
8975                         } else {
8976                             /* post increment */
8977                             tcg_gen_addi_i32(addr, addr, 4);
8978                         }
8979                     } else {
8980                         if (insn & (1 << 24)) {
8981                             /* pre decrement */
8982                             if (n != 1)
8983                                 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8984                         } else {
8985                             /* post decrement */
8986                             tcg_gen_addi_i32(addr, addr, -(n * 4));
8987                         }
8988                     }
8989                     store_reg(s, rn, addr);
8990                 } else {
8991                     tcg_temp_free_i32(addr);
8992                 }
8993                 if (loaded_base) {
8994                     store_reg(s, rn, loaded_var);
8995                 }
8996                 if (exc_return) {
8997                     /* Restore CPSR from SPSR.  */
8998                     tmp = load_cpu_field(spsr);
8999                     gen_set_cpsr(tmp, CPSR_ERET_MASK);
9000                     tcg_temp_free_i32(tmp);
9001                     s->is_jmp = DISAS_UPDATE;
9002                 }
9003             }
9004             break;
9005         case 0xa:
9006         case 0xb:
9007             {
9008                 int32_t offset;
9009
9010                 /* branch (and link) */
9011                 val = (int32_t)s->pc;
9012                 if (insn & (1 << 24)) {
9013                     tmp = tcg_temp_new_i32();
9014                     tcg_gen_movi_i32(tmp, val);
9015                     store_reg(s, 14, tmp);
9016                 }
9017                 offset = sextract32(insn << 2, 0, 26);
9018                 val += offset + 4;
9019                 gen_jmp(s, val);
9020             }
9021             break;
9022         case 0xc:
9023         case 0xd:
9024         case 0xe:
9025             if (((insn >> 8) & 0xe) == 10) {
9026                 /* VFP.  */
9027                 if (disas_vfp_insn(s, insn)) {
9028                     goto illegal_op;
9029                 }
9030             } else if (disas_coproc_insn(s, insn)) {
9031                 /* Coprocessor.  */
9032                 goto illegal_op;
9033             }
9034             break;
9035         case 0xf:
9036             /* swi */
9037             gen_set_pc_im(s, s->pc);
9038             s->svc_imm = extract32(insn, 0, 24);
9039             s->is_jmp = DISAS_SWI;
9040             break;
9041         default:
9042         illegal_op:
9043             gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
9044                                default_exception_el(s));
9045             break;
9046         }
9047     }
9048 }
9049
9050 /* Return true if this is a Thumb-2 logical op.  */
9051 static int
9052 thumb2_logic_op(int op)
9053 {
9054     return (op < 8);
9055 }
9056
9057 /* Generate code for a Thumb-2 data processing operation.  If CONDS is nonzero
9058    then set condition code flags based on the result of the operation.
9059    If SHIFTER_OUT is nonzero then set the carry flag for logical operations
9060    to the high bit of T1.
9061    Returns zero if the opcode is valid.  */
9062
9063 static int
9064 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
9065                    TCGv_i32 t0, TCGv_i32 t1)
9066 {
9067     int logic_cc;
9068
9069     logic_cc = 0;
9070     switch (op) {
9071     case 0: /* and */
9072         tcg_gen_and_i32(t0, t0, t1);
9073         logic_cc = conds;
9074         break;
9075     case 1: /* bic */
9076         tcg_gen_andc_i32(t0, t0, t1);
9077         logic_cc = conds;
9078         break;
9079     case 2: /* orr */
9080         tcg_gen_or_i32(t0, t0, t1);
9081         logic_cc = conds;
9082         break;
9083     case 3: /* orn */
9084         tcg_gen_orc_i32(t0, t0, t1);
9085         logic_cc = conds;
9086         break;
9087     case 4: /* eor */
9088         tcg_gen_xor_i32(t0, t0, t1);
9089         logic_cc = conds;
9090         break;
9091     case 8: /* add */
9092         if (conds)
9093             gen_add_CC(t0, t0, t1);
9094         else
9095             tcg_gen_add_i32(t0, t0, t1);
9096         break;
9097     case 10: /* adc */
9098         if (conds)
9099             gen_adc_CC(t0, t0, t1);
9100         else
9101             gen_adc(t0, t1);
9102         break;
9103     case 11: /* sbc */
9104         if (conds) {
9105             gen_sbc_CC(t0, t0, t1);
9106         } else {
9107             gen_sub_carry(t0, t0, t1);
9108         }
9109         break;
9110     case 13: /* sub */
9111         if (conds)
9112             gen_sub_CC(t0, t0, t1);
9113         else
9114             tcg_gen_sub_i32(t0, t0, t1);
9115         break;
9116     case 14: /* rsb */
9117         if (conds)
9118             gen_sub_CC(t0, t1, t0);
9119         else
9120             tcg_gen_sub_i32(t0, t1, t0);
9121         break;
9122     default: /* 5, 6, 7, 9, 12, 15. */
9123         return 1;
9124     }
9125     if (logic_cc) {
9126         gen_logic_CC(t0);
9127         if (shifter_out)
9128             gen_set_CF_bit31(t1);
9129     }
9130     return 0;
9131 }
9132
9133 /* Translate a 32-bit thumb instruction.  Returns nonzero if the instruction
9134    is not legal.  */
9135 static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw1)
9136 {
9137     uint32_t insn, imm, shift, offset;
9138     uint32_t rd, rn, rm, rs;
9139     TCGv_i32 tmp;
9140     TCGv_i32 tmp2;
9141     TCGv_i32 tmp3;
9142     TCGv_i32 addr;
9143     TCGv_i64 tmp64;
9144     int op;
9145     int shiftop;
9146     int conds;
9147     int logic_cc;
9148
9149     if (!(arm_dc_feature(s, ARM_FEATURE_THUMB2)
9150           || arm_dc_feature(s, ARM_FEATURE_M))) {
9151         /* Thumb-1 cores may need to treat bl and blx as a pair of
9152            16-bit instructions to get correct prefetch abort behavior.  */
9153         insn = insn_hw1;
9154         if ((insn & (1 << 12)) == 0) {
9155             ARCH(5);
9156             /* Second half of blx.  */
9157             offset = ((insn & 0x7ff) << 1);
9158             tmp = load_reg(s, 14);
9159             tcg_gen_addi_i32(tmp, tmp, offset);
9160             tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
9161
9162             tmp2 = tcg_temp_new_i32();
9163             tcg_gen_movi_i32(tmp2, s->pc | 1);
9164             store_reg(s, 14, tmp2);
9165             gen_bx(s, tmp);
9166             return 0;
9167         }
9168         if (insn & (1 << 11)) {
9169             /* Second half of bl.  */
9170             offset = ((insn & 0x7ff) << 1) | 1;
9171             tmp = load_reg(s, 14);
9172             tcg_gen_addi_i32(tmp, tmp, offset);
9173
9174             tmp2 = tcg_temp_new_i32();
9175             tcg_gen_movi_i32(tmp2, s->pc | 1);
9176             store_reg(s, 14, tmp2);
9177             gen_bx(s, tmp);
9178             return 0;
9179         }
9180         if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
9181             /* Instruction spans a page boundary.  Implement it as two
9182                16-bit instructions in case the second half causes an
9183                prefetch abort.  */
9184             offset = ((int32_t)insn << 21) >> 9;
9185             tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset);
9186             return 0;
9187         }
9188         /* Fall through to 32-bit decode.  */
9189     }
9190
9191     insn = arm_lduw_code(env, s->pc, s->bswap_code);
9192     s->pc += 2;
9193     insn |= (uint32_t)insn_hw1 << 16;
9194
9195     if ((insn & 0xf800e800) != 0xf000e800) {
9196         ARCH(6T2);
9197     }
9198
9199     rn = (insn >> 16) & 0xf;
9200     rs = (insn >> 12) & 0xf;
9201     rd = (insn >> 8) & 0xf;
9202     rm = insn & 0xf;
9203     switch ((insn >> 25) & 0xf) {
9204     case 0: case 1: case 2: case 3:
9205         /* 16-bit instructions.  Should never happen.  */
9206         abort();
9207     case 4:
9208         if (insn & (1 << 22)) {
9209             /* Other load/store, table branch.  */
9210             if (insn & 0x01200000) {
9211                 /* Load/store doubleword.  */
9212                 if (rn == 15) {
9213                     addr = tcg_temp_new_i32();
9214                     tcg_gen_movi_i32(addr, s->pc & ~3);
9215                 } else {
9216                     addr = load_reg(s, rn);
9217                 }
9218                 offset = (insn & 0xff) * 4;
9219                 if ((insn & (1 << 23)) == 0)
9220                     offset = -offset;
9221                 if (insn & (1 << 24)) {
9222                     tcg_gen_addi_i32(addr, addr, offset);
9223                     offset = 0;
9224                 }
9225                 if (insn & (1 << 20)) {
9226                     /* ldrd */
9227                     tmp = tcg_temp_new_i32();
9228                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9229                     store_reg(s, rs, tmp);
9230                     tcg_gen_addi_i32(addr, addr, 4);
9231                     tmp = tcg_temp_new_i32();
9232                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9233                     store_reg(s, rd, tmp);
9234                 } else {
9235                     /* strd */
9236                     tmp = load_reg(s, rs);
9237                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9238                     tcg_temp_free_i32(tmp);
9239                     tcg_gen_addi_i32(addr, addr, 4);
9240                     tmp = load_reg(s, rd);
9241                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9242                     tcg_temp_free_i32(tmp);
9243                 }
9244                 if (insn & (1 << 21)) {
9245                     /* Base writeback.  */
9246                     if (rn == 15)
9247                         goto illegal_op;
9248                     tcg_gen_addi_i32(addr, addr, offset - 4);
9249                     store_reg(s, rn, addr);
9250                 } else {
9251                     tcg_temp_free_i32(addr);
9252                 }
9253             } else if ((insn & (1 << 23)) == 0) {
9254                 /* Load/store exclusive word.  */
9255                 addr = tcg_temp_local_new_i32();
9256                 load_reg_var(s, addr, rn);
9257                 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
9258                 if (insn & (1 << 20)) {
9259                     gen_load_exclusive(s, rs, 15, addr, 2);
9260                 } else {
9261                     gen_store_exclusive(s, rd, rs, 15, addr, 2);
9262                 }
9263                 tcg_temp_free_i32(addr);
9264             } else if ((insn & (7 << 5)) == 0) {
9265                 /* Table Branch.  */
9266                 if (rn == 15) {
9267                     addr = tcg_temp_new_i32();
9268                     tcg_gen_movi_i32(addr, s->pc);
9269                 } else {
9270                     addr = load_reg(s, rn);
9271                 }
9272                 tmp = load_reg(s, rm);
9273                 tcg_gen_add_i32(addr, addr, tmp);
9274                 if (insn & (1 << 4)) {
9275                     /* tbh */
9276                     tcg_gen_add_i32(addr, addr, tmp);
9277                     tcg_temp_free_i32(tmp);
9278                     tmp = tcg_temp_new_i32();
9279                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9280                 } else { /* tbb */
9281                     tcg_temp_free_i32(tmp);
9282                     tmp = tcg_temp_new_i32();
9283                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9284                 }
9285                 tcg_temp_free_i32(addr);
9286                 tcg_gen_shli_i32(tmp, tmp, 1);
9287                 tcg_gen_addi_i32(tmp, tmp, s->pc);
9288                 store_reg(s, 15, tmp);
9289             } else {
9290                 int op2 = (insn >> 6) & 0x3;
9291                 op = (insn >> 4) & 0x3;
9292                 switch (op2) {
9293                 case 0:
9294                     goto illegal_op;
9295                 case 1:
9296                     /* Load/store exclusive byte/halfword/doubleword */
9297                     if (op == 2) {
9298                         goto illegal_op;
9299                     }
9300                     ARCH(7);
9301                     break;
9302                 case 2:
9303                     /* Load-acquire/store-release */
9304                     if (op == 3) {
9305                         goto illegal_op;
9306                     }
9307                     /* Fall through */
9308                 case 3:
9309                     /* Load-acquire/store-release exclusive */
9310                     ARCH(8);
9311                     break;
9312                 }
9313                 addr = tcg_temp_local_new_i32();
9314                 load_reg_var(s, addr, rn);
9315                 if (!(op2 & 1)) {
9316                     if (insn & (1 << 20)) {
9317                         tmp = tcg_temp_new_i32();
9318                         switch (op) {
9319                         case 0: /* ldab */
9320                             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9321                             break;
9322                         case 1: /* ldah */
9323                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9324                             break;
9325                         case 2: /* lda */
9326                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9327                             break;
9328                         default:
9329                             abort();
9330                         }
9331                         store_reg(s, rs, tmp);
9332                     } else {
9333                         tmp = load_reg(s, rs);
9334                         switch (op) {
9335                         case 0: /* stlb */
9336                             gen_aa32_st8(tmp, addr, get_mem_index(s));
9337                             break;
9338                         case 1: /* stlh */
9339                             gen_aa32_st16(tmp, addr, get_mem_index(s));
9340                             break;
9341                         case 2: /* stl */
9342                             gen_aa32_st32(tmp, addr, get_mem_index(s));
9343                             break;
9344                         default:
9345                             abort();
9346                         }
9347                         tcg_temp_free_i32(tmp);
9348                     }
9349                 } else if (insn & (1 << 20)) {
9350                     gen_load_exclusive(s, rs, rd, addr, op);
9351                 } else {
9352                     gen_store_exclusive(s, rm, rs, rd, addr, op);
9353                 }
9354                 tcg_temp_free_i32(addr);
9355             }
9356         } else {
9357             /* Load/store multiple, RFE, SRS.  */
9358             if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
9359                 /* RFE, SRS: not available in user mode or on M profile */
9360                 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
9361                     goto illegal_op;
9362                 }
9363                 if (insn & (1 << 20)) {
9364                     /* rfe */
9365                     addr = load_reg(s, rn);
9366                     if ((insn & (1 << 24)) == 0)
9367                         tcg_gen_addi_i32(addr, addr, -8);
9368                     /* Load PC into tmp and CPSR into tmp2.  */
9369                     tmp = tcg_temp_new_i32();
9370                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9371                     tcg_gen_addi_i32(addr, addr, 4);
9372                     tmp2 = tcg_temp_new_i32();
9373                     gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
9374                     if (insn & (1 << 21)) {
9375                         /* Base writeback.  */
9376                         if (insn & (1 << 24)) {
9377                             tcg_gen_addi_i32(addr, addr, 4);
9378                         } else {
9379                             tcg_gen_addi_i32(addr, addr, -4);
9380                         }
9381                         store_reg(s, rn, addr);
9382                     } else {
9383                         tcg_temp_free_i32(addr);
9384                     }
9385                     gen_rfe(s, tmp, tmp2);
9386                 } else {
9387                     /* srs */
9388                     gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
9389                             insn & (1 << 21));
9390                 }
9391             } else {
9392                 int i, loaded_base = 0;
9393                 TCGv_i32 loaded_var;
9394                 /* Load/store multiple.  */
9395                 addr = load_reg(s, rn);
9396                 offset = 0;
9397                 for (i = 0; i < 16; i++) {
9398                     if (insn & (1 << i))
9399                         offset += 4;
9400                 }
9401                 if (insn & (1 << 24)) {
9402                     tcg_gen_addi_i32(addr, addr, -offset);
9403                 }
9404
9405                 TCGV_UNUSED_I32(loaded_var);
9406                 for (i = 0; i < 16; i++) {
9407                     if ((insn & (1 << i)) == 0)
9408                         continue;
9409                     if (insn & (1 << 20)) {
9410                         /* Load.  */
9411                         tmp = tcg_temp_new_i32();
9412                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9413                         if (i == 15) {
9414                             gen_bx(s, tmp);
9415                         } else if (i == rn) {
9416                             loaded_var = tmp;
9417                             loaded_base = 1;
9418                         } else {
9419                             store_reg(s, i, tmp);
9420                         }
9421                     } else {
9422                         /* Store.  */
9423                         tmp = load_reg(s, i);
9424                         gen_aa32_st32(tmp, addr, get_mem_index(s));
9425                         tcg_temp_free_i32(tmp);
9426                     }
9427                     tcg_gen_addi_i32(addr, addr, 4);
9428                 }
9429                 if (loaded_base) {
9430                     store_reg(s, rn, loaded_var);
9431                 }
9432                 if (insn & (1 << 21)) {
9433                     /* Base register writeback.  */
9434                     if (insn & (1 << 24)) {
9435                         tcg_gen_addi_i32(addr, addr, -offset);
9436                     }
9437                     /* Fault if writeback register is in register list.  */
9438                     if (insn & (1 << rn))
9439                         goto illegal_op;
9440                     store_reg(s, rn, addr);
9441                 } else {
9442                     tcg_temp_free_i32(addr);
9443                 }
9444             }
9445         }
9446         break;
9447     case 5:
9448
9449         op = (insn >> 21) & 0xf;
9450         if (op == 6) {
9451             if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9452                 goto illegal_op;
9453             }
9454             /* Halfword pack.  */
9455             tmp = load_reg(s, rn);
9456             tmp2 = load_reg(s, rm);
9457             shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
9458             if (insn & (1 << 5)) {
9459                 /* pkhtb */
9460                 if (shift == 0)
9461                     shift = 31;
9462                 tcg_gen_sari_i32(tmp2, tmp2, shift);
9463                 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
9464                 tcg_gen_ext16u_i32(tmp2, tmp2);
9465             } else {
9466                 /* pkhbt */
9467                 if (shift)
9468                     tcg_gen_shli_i32(tmp2, tmp2, shift);
9469                 tcg_gen_ext16u_i32(tmp, tmp);
9470                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
9471             }
9472             tcg_gen_or_i32(tmp, tmp, tmp2);
9473             tcg_temp_free_i32(tmp2);
9474             store_reg(s, rd, tmp);
9475         } else {
9476             /* Data processing register constant shift.  */
9477             if (rn == 15) {
9478                 tmp = tcg_temp_new_i32();
9479                 tcg_gen_movi_i32(tmp, 0);
9480             } else {
9481                 tmp = load_reg(s, rn);
9482             }
9483             tmp2 = load_reg(s, rm);
9484
9485             shiftop = (insn >> 4) & 3;
9486             shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
9487             conds = (insn & (1 << 20)) != 0;
9488             logic_cc = (conds && thumb2_logic_op(op));
9489             gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
9490             if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
9491                 goto illegal_op;
9492             tcg_temp_free_i32(tmp2);
9493             if (rd != 15) {
9494                 store_reg(s, rd, tmp);
9495             } else {
9496                 tcg_temp_free_i32(tmp);
9497             }
9498         }
9499         break;
9500     case 13: /* Misc data processing.  */
9501         op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
9502         if (op < 4 && (insn & 0xf000) != 0xf000)
9503             goto illegal_op;
9504         switch (op) {
9505         case 0: /* Register controlled shift.  */
9506             tmp = load_reg(s, rn);
9507             tmp2 = load_reg(s, rm);
9508             if ((insn & 0x70) != 0)
9509                 goto illegal_op;
9510             op = (insn >> 21) & 3;
9511             logic_cc = (insn & (1 << 20)) != 0;
9512             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
9513             if (logic_cc)
9514                 gen_logic_CC(tmp);
9515             store_reg_bx(s, rd, tmp);
9516             break;
9517         case 1: /* Sign/zero extend.  */
9518             op = (insn >> 20) & 7;
9519             switch (op) {
9520             case 0: /* SXTAH, SXTH */
9521             case 1: /* UXTAH, UXTH */
9522             case 4: /* SXTAB, SXTB */
9523             case 5: /* UXTAB, UXTB */
9524                 break;
9525             case 2: /* SXTAB16, SXTB16 */
9526             case 3: /* UXTAB16, UXTB16 */
9527                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9528                     goto illegal_op;
9529                 }
9530                 break;
9531             default:
9532                 goto illegal_op;
9533             }
9534             if (rn != 15) {
9535                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9536                     goto illegal_op;
9537                 }
9538             }
9539             tmp = load_reg(s, rm);
9540             shift = (insn >> 4) & 3;
9541             /* ??? In many cases it's not necessary to do a
9542                rotate, a shift is sufficient.  */
9543             if (shift != 0)
9544                 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
9545             op = (insn >> 20) & 7;
9546             switch (op) {
9547             case 0: gen_sxth(tmp);   break;
9548             case 1: gen_uxth(tmp);   break;
9549             case 2: gen_sxtb16(tmp); break;
9550             case 3: gen_uxtb16(tmp); break;
9551             case 4: gen_sxtb(tmp);   break;
9552             case 5: gen_uxtb(tmp);   break;
9553             default:
9554                 g_assert_not_reached();
9555             }
9556             if (rn != 15) {
9557                 tmp2 = load_reg(s, rn);
9558                 if ((op >> 1) == 1) {
9559                     gen_add16(tmp, tmp2);
9560                 } else {
9561                     tcg_gen_add_i32(tmp, tmp, tmp2);
9562                     tcg_temp_free_i32(tmp2);
9563                 }
9564             }
9565             store_reg(s, rd, tmp);
9566             break;
9567         case 2: /* SIMD add/subtract.  */
9568             if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9569                 goto illegal_op;
9570             }
9571             op = (insn >> 20) & 7;
9572             shift = (insn >> 4) & 7;
9573             if ((op & 3) == 3 || (shift & 3) == 3)
9574                 goto illegal_op;
9575             tmp = load_reg(s, rn);
9576             tmp2 = load_reg(s, rm);
9577             gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
9578             tcg_temp_free_i32(tmp2);
9579             store_reg(s, rd, tmp);
9580             break;
9581         case 3: /* Other data processing.  */
9582             op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
9583             if (op < 4) {
9584                 /* Saturating add/subtract.  */
9585                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9586                     goto illegal_op;
9587                 }
9588                 tmp = load_reg(s, rn);
9589                 tmp2 = load_reg(s, rm);
9590                 if (op & 1)
9591                     gen_helper_double_saturate(tmp, cpu_env, tmp);
9592                 if (op & 2)
9593                     gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
9594                 else
9595                     gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
9596                 tcg_temp_free_i32(tmp2);
9597             } else {
9598                 switch (op) {
9599                 case 0x0a: /* rbit */
9600                 case 0x08: /* rev */
9601                 case 0x09: /* rev16 */
9602                 case 0x0b: /* revsh */
9603                 case 0x18: /* clz */
9604                     break;
9605                 case 0x10: /* sel */
9606                     if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9607                         goto illegal_op;
9608                     }
9609                     break;
9610                 case 0x20: /* crc32/crc32c */
9611                 case 0x21:
9612                 case 0x22:
9613                 case 0x28:
9614                 case 0x29:
9615                 case 0x2a:
9616                     if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
9617                         goto illegal_op;
9618                     }
9619                     break;
9620                 default:
9621                     goto illegal_op;
9622                 }
9623                 tmp = load_reg(s, rn);
9624                 switch (op) {
9625                 case 0x0a: /* rbit */
9626                     gen_helper_rbit(tmp, tmp);
9627                     break;
9628                 case 0x08: /* rev */
9629                     tcg_gen_bswap32_i32(tmp, tmp);
9630                     break;
9631                 case 0x09: /* rev16 */
9632                     gen_rev16(tmp);
9633                     break;
9634                 case 0x0b: /* revsh */
9635                     gen_revsh(tmp);
9636                     break;
9637                 case 0x10: /* sel */
9638                     tmp2 = load_reg(s, rm);
9639                     tmp3 = tcg_temp_new_i32();
9640                     tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
9641                     gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
9642                     tcg_temp_free_i32(tmp3);
9643                     tcg_temp_free_i32(tmp2);
9644                     break;
9645                 case 0x18: /* clz */
9646                     gen_helper_clz(tmp, tmp);
9647                     break;
9648                 case 0x20:
9649                 case 0x21:
9650                 case 0x22:
9651                 case 0x28:
9652                 case 0x29:
9653                 case 0x2a:
9654                 {
9655                     /* crc32/crc32c */
9656                     uint32_t sz = op & 0x3;
9657                     uint32_t c = op & 0x8;
9658
9659                     tmp2 = load_reg(s, rm);
9660                     if (sz == 0) {
9661                         tcg_gen_andi_i32(tmp2, tmp2, 0xff);
9662                     } else if (sz == 1) {
9663                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
9664                     }
9665                     tmp3 = tcg_const_i32(1 << sz);
9666                     if (c) {
9667                         gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
9668                     } else {
9669                         gen_helper_crc32(tmp, tmp, tmp2, tmp3);
9670                     }
9671                     tcg_temp_free_i32(tmp2);
9672                     tcg_temp_free_i32(tmp3);
9673                     break;
9674                 }
9675                 default:
9676                     g_assert_not_reached();
9677                 }
9678             }
9679             store_reg(s, rd, tmp);
9680             break;
9681         case 4: case 5: /* 32-bit multiply.  Sum of absolute differences.  */
9682             switch ((insn >> 20) & 7) {
9683             case 0: /* 32 x 32 -> 32 */
9684             case 7: /* Unsigned sum of absolute differences.  */
9685                 break;
9686             case 1: /* 16 x 16 -> 32 */
9687             case 2: /* Dual multiply add.  */
9688             case 3: /* 32 * 16 -> 32msb */
9689             case 4: /* Dual multiply subtract.  */
9690             case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
9691                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9692                     goto illegal_op;
9693                 }
9694                 break;
9695             }
9696             op = (insn >> 4) & 0xf;
9697             tmp = load_reg(s, rn);
9698             tmp2 = load_reg(s, rm);
9699             switch ((insn >> 20) & 7) {
9700             case 0: /* 32 x 32 -> 32 */
9701                 tcg_gen_mul_i32(tmp, tmp, tmp2);
9702                 tcg_temp_free_i32(tmp2);
9703                 if (rs != 15) {
9704                     tmp2 = load_reg(s, rs);
9705                     if (op)
9706                         tcg_gen_sub_i32(tmp, tmp2, tmp);
9707                     else
9708                         tcg_gen_add_i32(tmp, tmp, tmp2);
9709                     tcg_temp_free_i32(tmp2);
9710                 }
9711                 break;
9712             case 1: /* 16 x 16 -> 32 */
9713                 gen_mulxy(tmp, tmp2, op & 2, op & 1);
9714                 tcg_temp_free_i32(tmp2);
9715                 if (rs != 15) {
9716                     tmp2 = load_reg(s, rs);
9717                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9718                     tcg_temp_free_i32(tmp2);
9719                 }
9720                 break;
9721             case 2: /* Dual multiply add.  */
9722             case 4: /* Dual multiply subtract.  */
9723                 if (op)
9724                     gen_swap_half(tmp2);
9725                 gen_smul_dual(tmp, tmp2);
9726                 if (insn & (1 << 22)) {
9727                     /* This subtraction cannot overflow. */
9728                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9729                 } else {
9730                     /* This addition cannot overflow 32 bits;
9731                      * however it may overflow considered as a signed
9732                      * operation, in which case we must set the Q flag.
9733                      */
9734                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9735                 }
9736                 tcg_temp_free_i32(tmp2);
9737                 if (rs != 15)
9738                   {
9739                     tmp2 = load_reg(s, rs);
9740                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9741                     tcg_temp_free_i32(tmp2);
9742                   }
9743                 break;
9744             case 3: /* 32 * 16 -> 32msb */
9745                 if (op)
9746                     tcg_gen_sari_i32(tmp2, tmp2, 16);
9747                 else
9748                     gen_sxth(tmp2);
9749                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9750                 tcg_gen_shri_i64(tmp64, tmp64, 16);
9751                 tmp = tcg_temp_new_i32();
9752                 tcg_gen_trunc_i64_i32(tmp, tmp64);
9753                 tcg_temp_free_i64(tmp64);
9754                 if (rs != 15)
9755                   {
9756                     tmp2 = load_reg(s, rs);
9757                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9758                     tcg_temp_free_i32(tmp2);
9759                   }
9760                 break;
9761             case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
9762                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9763                 if (rs != 15) {
9764                     tmp = load_reg(s, rs);
9765                     if (insn & (1 << 20)) {
9766                         tmp64 = gen_addq_msw(tmp64, tmp);
9767                     } else {
9768                         tmp64 = gen_subq_msw(tmp64, tmp);
9769                     }
9770                 }
9771                 if (insn & (1 << 4)) {
9772                     tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
9773                 }
9774                 tcg_gen_shri_i64(tmp64, tmp64, 32);
9775                 tmp = tcg_temp_new_i32();
9776                 tcg_gen_trunc_i64_i32(tmp, tmp64);
9777                 tcg_temp_free_i64(tmp64);
9778                 break;
9779             case 7: /* Unsigned sum of absolute differences.  */
9780                 gen_helper_usad8(tmp, tmp, tmp2);
9781                 tcg_temp_free_i32(tmp2);
9782                 if (rs != 15) {
9783                     tmp2 = load_reg(s, rs);
9784                     tcg_gen_add_i32(tmp, tmp, tmp2);
9785                     tcg_temp_free_i32(tmp2);
9786                 }
9787                 break;
9788             }
9789             store_reg(s, rd, tmp);
9790             break;
9791         case 6: case 7: /* 64-bit multiply, Divide.  */
9792             op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
9793             tmp = load_reg(s, rn);
9794             tmp2 = load_reg(s, rm);
9795             if ((op & 0x50) == 0x10) {
9796                 /* sdiv, udiv */
9797                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
9798                     goto illegal_op;
9799                 }
9800                 if (op & 0x20)
9801                     gen_helper_udiv(tmp, tmp, tmp2);
9802                 else
9803                     gen_helper_sdiv(tmp, tmp, tmp2);
9804                 tcg_temp_free_i32(tmp2);
9805                 store_reg(s, rd, tmp);
9806             } else if ((op & 0xe) == 0xc) {
9807                 /* Dual multiply accumulate long.  */
9808                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9809                     tcg_temp_free_i32(tmp);
9810                     tcg_temp_free_i32(tmp2);
9811                     goto illegal_op;
9812                 }
9813                 if (op & 1)
9814                     gen_swap_half(tmp2);
9815                 gen_smul_dual(tmp, tmp2);
9816                 if (op & 0x10) {
9817                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9818                 } else {
9819                     tcg_gen_add_i32(tmp, tmp, tmp2);
9820                 }
9821                 tcg_temp_free_i32(tmp2);
9822                 /* BUGFIX */
9823                 tmp64 = tcg_temp_new_i64();
9824                 tcg_gen_ext_i32_i64(tmp64, tmp);
9825                 tcg_temp_free_i32(tmp);
9826                 gen_addq(s, tmp64, rs, rd);
9827                 gen_storeq_reg(s, rs, rd, tmp64);
9828                 tcg_temp_free_i64(tmp64);
9829             } else {
9830                 if (op & 0x20) {
9831                     /* Unsigned 64-bit multiply  */
9832                     tmp64 = gen_mulu_i64_i32(tmp, tmp2);
9833                 } else {
9834                     if (op & 8) {
9835                         /* smlalxy */
9836                         if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9837                             tcg_temp_free_i32(tmp2);
9838                             tcg_temp_free_i32(tmp);
9839                             goto illegal_op;
9840                         }
9841                         gen_mulxy(tmp, tmp2, op & 2, op & 1);
9842                         tcg_temp_free_i32(tmp2);
9843                         tmp64 = tcg_temp_new_i64();
9844                         tcg_gen_ext_i32_i64(tmp64, tmp);
9845                         tcg_temp_free_i32(tmp);
9846                     } else {
9847                         /* Signed 64-bit multiply  */
9848                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
9849                     }
9850                 }
9851                 if (op & 4) {
9852                     /* umaal */
9853                     if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9854                         tcg_temp_free_i64(tmp64);
9855                         goto illegal_op;
9856                     }
9857                     gen_addq_lo(s, tmp64, rs);
9858                     gen_addq_lo(s, tmp64, rd);
9859                 } else if (op & 0x40) {
9860                     /* 64-bit accumulate.  */
9861                     gen_addq(s, tmp64, rs, rd);
9862                 }
9863                 gen_storeq_reg(s, rs, rd, tmp64);
9864                 tcg_temp_free_i64(tmp64);
9865             }
9866             break;
9867         }
9868         break;
9869     case 6: case 7: case 14: case 15:
9870         /* Coprocessor.  */
9871         if (((insn >> 24) & 3) == 3) {
9872             /* Translate into the equivalent ARM encoding.  */
9873             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
9874             if (disas_neon_data_insn(s, insn)) {
9875                 goto illegal_op;
9876             }
9877         } else if (((insn >> 8) & 0xe) == 10) {
9878             if (disas_vfp_insn(s, insn)) {
9879                 goto illegal_op;
9880             }
9881         } else {
9882             if (insn & (1 << 28))
9883                 goto illegal_op;
9884             if (disas_coproc_insn(s, insn)) {
9885                 goto illegal_op;
9886             }
9887         }
9888         break;
9889     case 8: case 9: case 10: case 11:
9890         if (insn & (1 << 15)) {
9891             /* Branches, misc control.  */
9892             if (insn & 0x5000) {
9893                 /* Unconditional branch.  */
9894                 /* signextend(hw1[10:0]) -> offset[:12].  */
9895                 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
9896                 /* hw1[10:0] -> offset[11:1].  */
9897                 offset |= (insn & 0x7ff) << 1;
9898                 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
9899                    offset[24:22] already have the same value because of the
9900                    sign extension above.  */
9901                 offset ^= ((~insn) & (1 << 13)) << 10;
9902                 offset ^= ((~insn) & (1 << 11)) << 11;
9903
9904                 if (insn & (1 << 14)) {
9905                     /* Branch and link.  */
9906                     tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
9907                 }
9908
9909                 offset += s->pc;
9910                 if (insn & (1 << 12)) {
9911                     /* b/bl */
9912                     gen_jmp(s, offset);
9913                 } else {
9914                     /* blx */
9915                     offset &= ~(uint32_t)2;
9916                     /* thumb2 bx, no need to check */
9917                     gen_bx_im(s, offset);
9918                 }
9919             } else if (((insn >> 23) & 7) == 7) {
9920                 /* Misc control */
9921                 if (insn & (1 << 13))
9922                     goto illegal_op;
9923
9924                 if (insn & (1 << 26)) {
9925                     if (!(insn & (1 << 20))) {
9926                         /* Hypervisor call (v7) */
9927                         int imm16 = extract32(insn, 16, 4) << 12
9928                             | extract32(insn, 0, 12);
9929                         ARCH(7);
9930                         if (IS_USER(s)) {
9931                             goto illegal_op;
9932                         }
9933                         gen_hvc(s, imm16);
9934                     } else {
9935                         /* Secure monitor call (v6+) */
9936                         ARCH(6K);
9937                         if (IS_USER(s)) {
9938                             goto illegal_op;
9939                         }
9940                         gen_smc(s);
9941                     }
9942                 } else {
9943                     op = (insn >> 20) & 7;
9944                     switch (op) {
9945                     case 0: /* msr cpsr.  */
9946                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9947                             tmp = load_reg(s, rn);
9948                             addr = tcg_const_i32(insn & 0xff);
9949                             gen_helper_v7m_msr(cpu_env, addr, tmp);
9950                             tcg_temp_free_i32(addr);
9951                             tcg_temp_free_i32(tmp);
9952                             gen_lookup_tb(s);
9953                             break;
9954                         }
9955                         /* fall through */
9956                     case 1: /* msr spsr.  */
9957                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9958                             goto illegal_op;
9959                         }
9960                         tmp = load_reg(s, rn);
9961                         if (gen_set_psr(s,
9962                               msr_mask(s, (insn >> 8) & 0xf, op == 1),
9963                               op == 1, tmp))
9964                             goto illegal_op;
9965                         break;
9966                     case 2: /* cps, nop-hint.  */
9967                         if (((insn >> 8) & 7) == 0) {
9968                             gen_nop_hint(s, insn & 0xff);
9969                         }
9970                         /* Implemented as NOP in user mode.  */
9971                         if (IS_USER(s))
9972                             break;
9973                         offset = 0;
9974                         imm = 0;
9975                         if (insn & (1 << 10)) {
9976                             if (insn & (1 << 7))
9977                                 offset |= CPSR_A;
9978                             if (insn & (1 << 6))
9979                                 offset |= CPSR_I;
9980                             if (insn & (1 << 5))
9981                                 offset |= CPSR_F;
9982                             if (insn & (1 << 9))
9983                                 imm = CPSR_A | CPSR_I | CPSR_F;
9984                         }
9985                         if (insn & (1 << 8)) {
9986                             offset |= 0x1f;
9987                             imm |= (insn & 0x1f);
9988                         }
9989                         if (offset) {
9990                             gen_set_psr_im(s, offset, 0, imm);
9991                         }
9992                         break;
9993                     case 3: /* Special control operations.  */
9994                         ARCH(7);
9995                         op = (insn >> 4) & 0xf;
9996                         switch (op) {
9997                         case 2: /* clrex */
9998                             gen_clrex(s);
9999                             break;
10000                         case 4: /* dsb */
10001                         case 5: /* dmb */
10002                         case 6: /* isb */
10003                             /* These execute as NOPs.  */
10004                             break;
10005                         default:
10006                             goto illegal_op;
10007                         }
10008                         break;
10009                     case 4: /* bxj */
10010                         /* Trivial implementation equivalent to bx.  */
10011                         tmp = load_reg(s, rn);
10012                         gen_bx(s, tmp);
10013                         break;
10014                     case 5: /* Exception return.  */
10015                         if (IS_USER(s)) {
10016                             goto illegal_op;
10017                         }
10018                         if (rn != 14 || rd != 15) {
10019                             goto illegal_op;
10020                         }
10021                         tmp = load_reg(s, rn);
10022                         tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
10023                         gen_exception_return(s, tmp);
10024                         break;
10025                     case 6: /* mrs cpsr.  */
10026                         tmp = tcg_temp_new_i32();
10027                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10028                             addr = tcg_const_i32(insn & 0xff);
10029                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
10030                             tcg_temp_free_i32(addr);
10031                         } else {
10032                             gen_helper_cpsr_read(tmp, cpu_env);
10033                         }
10034                         store_reg(s, rd, tmp);
10035                         break;
10036                     case 7: /* mrs spsr.  */
10037                         /* Not accessible in user mode.  */
10038                         if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
10039                             goto illegal_op;
10040                         }
10041                         tmp = load_cpu_field(spsr);
10042                         store_reg(s, rd, tmp);
10043                         break;
10044                     }
10045                 }
10046             } else {
10047                 /* Conditional branch.  */
10048                 op = (insn >> 22) & 0xf;
10049                 /* Generate a conditional jump to next instruction.  */
10050                 s->condlabel = gen_new_label();
10051                 arm_gen_test_cc(op ^ 1, s->condlabel);
10052                 s->condjmp = 1;
10053
10054                 /* offset[11:1] = insn[10:0] */
10055                 offset = (insn & 0x7ff) << 1;
10056                 /* offset[17:12] = insn[21:16].  */
10057                 offset |= (insn & 0x003f0000) >> 4;
10058                 /* offset[31:20] = insn[26].  */
10059                 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
10060                 /* offset[18] = insn[13].  */
10061                 offset |= (insn & (1 << 13)) << 5;
10062                 /* offset[19] = insn[11].  */
10063                 offset |= (insn & (1 << 11)) << 8;
10064
10065                 /* jump to the offset */
10066                 gen_jmp(s, s->pc + offset);
10067             }
10068         } else {
10069             /* Data processing immediate.  */
10070             if (insn & (1 << 25)) {
10071                 if (insn & (1 << 24)) {
10072                     if (insn & (1 << 20))
10073                         goto illegal_op;
10074                     /* Bitfield/Saturate.  */
10075                     op = (insn >> 21) & 7;
10076                     imm = insn & 0x1f;
10077                     shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
10078                     if (rn == 15) {
10079                         tmp = tcg_temp_new_i32();
10080                         tcg_gen_movi_i32(tmp, 0);
10081                     } else {
10082                         tmp = load_reg(s, rn);
10083                     }
10084                     switch (op) {
10085                     case 2: /* Signed bitfield extract.  */
10086                         imm++;
10087                         if (shift + imm > 32)
10088                             goto illegal_op;
10089                         if (imm < 32)
10090                             gen_sbfx(tmp, shift, imm);
10091                         break;
10092                     case 6: /* Unsigned bitfield extract.  */
10093                         imm++;
10094                         if (shift + imm > 32)
10095                             goto illegal_op;
10096                         if (imm < 32)
10097                             gen_ubfx(tmp, shift, (1u << imm) - 1);
10098                         break;
10099                     case 3: /* Bitfield insert/clear.  */
10100                         if (imm < shift)
10101                             goto illegal_op;
10102                         imm = imm + 1 - shift;
10103                         if (imm != 32) {
10104                             tmp2 = load_reg(s, rd);
10105                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
10106                             tcg_temp_free_i32(tmp2);
10107                         }
10108                         break;
10109                     case 7:
10110                         goto illegal_op;
10111                     default: /* Saturate.  */
10112                         if (shift) {
10113                             if (op & 1)
10114                                 tcg_gen_sari_i32(tmp, tmp, shift);
10115                             else
10116                                 tcg_gen_shli_i32(tmp, tmp, shift);
10117                         }
10118                         tmp2 = tcg_const_i32(imm);
10119                         if (op & 4) {
10120                             /* Unsigned.  */
10121                             if ((op & 1) && shift == 0) {
10122                                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10123                                     tcg_temp_free_i32(tmp);
10124                                     tcg_temp_free_i32(tmp2);
10125                                     goto illegal_op;
10126                                 }
10127                                 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
10128                             } else {
10129                                 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
10130                             }
10131                         } else {
10132                             /* Signed.  */
10133                             if ((op & 1) && shift == 0) {
10134                                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10135                                     tcg_temp_free_i32(tmp);
10136                                     tcg_temp_free_i32(tmp2);
10137                                     goto illegal_op;
10138                                 }
10139                                 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
10140                             } else {
10141                                 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
10142                             }
10143                         }
10144                         tcg_temp_free_i32(tmp2);
10145                         break;
10146                     }
10147                     store_reg(s, rd, tmp);
10148                 } else {
10149                     imm = ((insn & 0x04000000) >> 15)
10150                           | ((insn & 0x7000) >> 4) | (insn & 0xff);
10151                     if (insn & (1 << 22)) {
10152                         /* 16-bit immediate.  */
10153                         imm |= (insn >> 4) & 0xf000;
10154                         if (insn & (1 << 23)) {
10155                             /* movt */
10156                             tmp = load_reg(s, rd);
10157                             tcg_gen_ext16u_i32(tmp, tmp);
10158                             tcg_gen_ori_i32(tmp, tmp, imm << 16);
10159                         } else {
10160                             /* movw */
10161                             tmp = tcg_temp_new_i32();
10162                             tcg_gen_movi_i32(tmp, imm);
10163                         }
10164                     } else {
10165                         /* Add/sub 12-bit immediate.  */
10166                         if (rn == 15) {
10167                             offset = s->pc & ~(uint32_t)3;
10168                             if (insn & (1 << 23))
10169                                 offset -= imm;
10170                             else
10171                                 offset += imm;
10172                             tmp = tcg_temp_new_i32();
10173                             tcg_gen_movi_i32(tmp, offset);
10174                         } else {
10175                             tmp = load_reg(s, rn);
10176                             if (insn & (1 << 23))
10177                                 tcg_gen_subi_i32(tmp, tmp, imm);
10178                             else
10179                                 tcg_gen_addi_i32(tmp, tmp, imm);
10180                         }
10181                     }
10182                     store_reg(s, rd, tmp);
10183                 }
10184             } else {
10185                 int shifter_out = 0;
10186                 /* modified 12-bit immediate.  */
10187                 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
10188                 imm = (insn & 0xff);
10189                 switch (shift) {
10190                 case 0: /* XY */
10191                     /* Nothing to do.  */
10192                     break;
10193                 case 1: /* 00XY00XY */
10194                     imm |= imm << 16;
10195                     break;
10196                 case 2: /* XY00XY00 */
10197                     imm |= imm << 16;
10198                     imm <<= 8;
10199                     break;
10200                 case 3: /* XYXYXYXY */
10201                     imm |= imm << 16;
10202                     imm |= imm << 8;
10203                     break;
10204                 default: /* Rotated constant.  */
10205                     shift = (shift << 1) | (imm >> 7);
10206                     imm |= 0x80;
10207                     imm = imm << (32 - shift);
10208                     shifter_out = 1;
10209                     break;
10210                 }
10211                 tmp2 = tcg_temp_new_i32();
10212                 tcg_gen_movi_i32(tmp2, imm);
10213                 rn = (insn >> 16) & 0xf;
10214                 if (rn == 15) {
10215                     tmp = tcg_temp_new_i32();
10216                     tcg_gen_movi_i32(tmp, 0);
10217                 } else {
10218                     tmp = load_reg(s, rn);
10219                 }
10220                 op = (insn >> 21) & 0xf;
10221                 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
10222                                        shifter_out, tmp, tmp2))
10223                     goto illegal_op;
10224                 tcg_temp_free_i32(tmp2);
10225                 rd = (insn >> 8) & 0xf;
10226                 if (rd != 15) {
10227                     store_reg(s, rd, tmp);
10228                 } else {
10229                     tcg_temp_free_i32(tmp);
10230                 }
10231             }
10232         }
10233         break;
10234     case 12: /* Load/store single data item.  */
10235         {
10236         int postinc = 0;
10237         int writeback = 0;
10238         int memidx;
10239         if ((insn & 0x01100000) == 0x01000000) {
10240             if (disas_neon_ls_insn(s, insn)) {
10241                 goto illegal_op;
10242             }
10243             break;
10244         }
10245         op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
10246         if (rs == 15) {
10247             if (!(insn & (1 << 20))) {
10248                 goto illegal_op;
10249             }
10250             if (op != 2) {
10251                 /* Byte or halfword load space with dest == r15 : memory hints.
10252                  * Catch them early so we don't emit pointless addressing code.
10253                  * This space is a mix of:
10254                  *  PLD/PLDW/PLI,  which we implement as NOPs (note that unlike
10255                  *     the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
10256                  *     cores)
10257                  *  unallocated hints, which must be treated as NOPs
10258                  *  UNPREDICTABLE space, which we NOP or UNDEF depending on
10259                  *     which is easiest for the decoding logic
10260                  *  Some space which must UNDEF
10261                  */
10262                 int op1 = (insn >> 23) & 3;
10263                 int op2 = (insn >> 6) & 0x3f;
10264                 if (op & 2) {
10265                     goto illegal_op;
10266                 }
10267                 if (rn == 15) {
10268                     /* UNPREDICTABLE, unallocated hint or
10269                      * PLD/PLDW/PLI (literal)
10270                      */
10271                     return 0;
10272                 }
10273                 if (op1 & 1) {
10274                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10275                 }
10276                 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
10277                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10278                 }
10279                 /* UNDEF space, or an UNPREDICTABLE */
10280                 return 1;
10281             }
10282         }
10283         memidx = get_mem_index(s);
10284         if (rn == 15) {
10285             addr = tcg_temp_new_i32();
10286             /* PC relative.  */
10287             /* s->pc has already been incremented by 4.  */
10288             imm = s->pc & 0xfffffffc;
10289             if (insn & (1 << 23))
10290                 imm += insn & 0xfff;
10291             else
10292                 imm -= insn & 0xfff;
10293             tcg_gen_movi_i32(addr, imm);
10294         } else {
10295             addr = load_reg(s, rn);
10296             if (insn & (1 << 23)) {
10297                 /* Positive offset.  */
10298                 imm = insn & 0xfff;
10299                 tcg_gen_addi_i32(addr, addr, imm);
10300             } else {
10301                 imm = insn & 0xff;
10302                 switch ((insn >> 8) & 0xf) {
10303                 case 0x0: /* Shifted Register.  */
10304                     shift = (insn >> 4) & 0xf;
10305                     if (shift > 3) {
10306                         tcg_temp_free_i32(addr);
10307                         goto illegal_op;
10308                     }
10309                     tmp = load_reg(s, rm);
10310                     if (shift)
10311                         tcg_gen_shli_i32(tmp, tmp, shift);
10312                     tcg_gen_add_i32(addr, addr, tmp);
10313                     tcg_temp_free_i32(tmp);
10314                     break;
10315                 case 0xc: /* Negative offset.  */
10316                     tcg_gen_addi_i32(addr, addr, -imm);
10317                     break;
10318                 case 0xe: /* User privilege.  */
10319                     tcg_gen_addi_i32(addr, addr, imm);
10320                     memidx = get_a32_user_mem_index(s);
10321                     break;
10322                 case 0x9: /* Post-decrement.  */
10323                     imm = -imm;
10324                     /* Fall through.  */
10325                 case 0xb: /* Post-increment.  */
10326                     postinc = 1;
10327                     writeback = 1;
10328                     break;
10329                 case 0xd: /* Pre-decrement.  */
10330                     imm = -imm;
10331                     /* Fall through.  */
10332                 case 0xf: /* Pre-increment.  */
10333                     tcg_gen_addi_i32(addr, addr, imm);
10334                     writeback = 1;
10335                     break;
10336                 default:
10337                     tcg_temp_free_i32(addr);
10338                     goto illegal_op;
10339                 }
10340             }
10341         }
10342         if (insn & (1 << 20)) {
10343             /* Load.  */
10344             tmp = tcg_temp_new_i32();
10345             switch (op) {
10346             case 0:
10347                 gen_aa32_ld8u(tmp, addr, memidx);
10348                 break;
10349             case 4:
10350                 gen_aa32_ld8s(tmp, addr, memidx);
10351                 break;
10352             case 1:
10353                 gen_aa32_ld16u(tmp, addr, memidx);
10354                 break;
10355             case 5:
10356                 gen_aa32_ld16s(tmp, addr, memidx);
10357                 break;
10358             case 2:
10359                 gen_aa32_ld32u(tmp, addr, memidx);
10360                 break;
10361             default:
10362                 tcg_temp_free_i32(tmp);
10363                 tcg_temp_free_i32(addr);
10364                 goto illegal_op;
10365             }
10366             if (rs == 15) {
10367                 gen_bx(s, tmp);
10368             } else {
10369                 store_reg(s, rs, tmp);
10370             }
10371         } else {
10372             /* Store.  */
10373             tmp = load_reg(s, rs);
10374             switch (op) {
10375             case 0:
10376                 gen_aa32_st8(tmp, addr, memidx);
10377                 break;
10378             case 1:
10379                 gen_aa32_st16(tmp, addr, memidx);
10380                 break;
10381             case 2:
10382                 gen_aa32_st32(tmp, addr, memidx);
10383                 break;
10384             default:
10385                 tcg_temp_free_i32(tmp);
10386                 tcg_temp_free_i32(addr);
10387                 goto illegal_op;
10388             }
10389             tcg_temp_free_i32(tmp);
10390         }
10391         if (postinc)
10392             tcg_gen_addi_i32(addr, addr, imm);
10393         if (writeback) {
10394             store_reg(s, rn, addr);
10395         } else {
10396             tcg_temp_free_i32(addr);
10397         }
10398         }
10399         break;
10400     default:
10401         goto illegal_op;
10402     }
10403     return 0;
10404 illegal_op:
10405     return 1;
10406 }
10407
10408 static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
10409 {
10410     uint32_t val, insn, op, rm, rn, rd, shift, cond;
10411     int32_t offset;
10412     int i;
10413     TCGv_i32 tmp;
10414     TCGv_i32 tmp2;
10415     TCGv_i32 addr;
10416
10417     if (s->condexec_mask) {
10418         cond = s->condexec_cond;
10419         if (cond != 0x0e) {     /* Skip conditional when condition is AL. */
10420           s->condlabel = gen_new_label();
10421           arm_gen_test_cc(cond ^ 1, s->condlabel);
10422           s->condjmp = 1;
10423         }
10424     }
10425
10426     insn = arm_lduw_code(env, s->pc, s->bswap_code);
10427     s->pc += 2;
10428
10429     switch (insn >> 12) {
10430     case 0: case 1:
10431
10432         rd = insn & 7;
10433         op = (insn >> 11) & 3;
10434         if (op == 3) {
10435             /* add/subtract */
10436             rn = (insn >> 3) & 7;
10437             tmp = load_reg(s, rn);
10438             if (insn & (1 << 10)) {
10439                 /* immediate */
10440                 tmp2 = tcg_temp_new_i32();
10441                 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
10442             } else {
10443                 /* reg */
10444                 rm = (insn >> 6) & 7;
10445                 tmp2 = load_reg(s, rm);
10446             }
10447             if (insn & (1 << 9)) {
10448                 if (s->condexec_mask)
10449                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10450                 else
10451                     gen_sub_CC(tmp, tmp, tmp2);
10452             } else {
10453                 if (s->condexec_mask)
10454                     tcg_gen_add_i32(tmp, tmp, tmp2);
10455                 else
10456                     gen_add_CC(tmp, tmp, tmp2);
10457             }
10458             tcg_temp_free_i32(tmp2);
10459             store_reg(s, rd, tmp);
10460         } else {
10461             /* shift immediate */
10462             rm = (insn >> 3) & 7;
10463             shift = (insn >> 6) & 0x1f;
10464             tmp = load_reg(s, rm);
10465             gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
10466             if (!s->condexec_mask)
10467                 gen_logic_CC(tmp);
10468             store_reg(s, rd, tmp);
10469         }
10470         break;
10471     case 2: case 3:
10472         /* arithmetic large immediate */
10473         op = (insn >> 11) & 3;
10474         rd = (insn >> 8) & 0x7;
10475         if (op == 0) { /* mov */
10476             tmp = tcg_temp_new_i32();
10477             tcg_gen_movi_i32(tmp, insn & 0xff);
10478             if (!s->condexec_mask)
10479                 gen_logic_CC(tmp);
10480             store_reg(s, rd, tmp);
10481         } else {
10482             tmp = load_reg(s, rd);
10483             tmp2 = tcg_temp_new_i32();
10484             tcg_gen_movi_i32(tmp2, insn & 0xff);
10485             switch (op) {
10486             case 1: /* cmp */
10487                 gen_sub_CC(tmp, tmp, tmp2);
10488                 tcg_temp_free_i32(tmp);
10489                 tcg_temp_free_i32(tmp2);
10490                 break;
10491             case 2: /* add */
10492                 if (s->condexec_mask)
10493                     tcg_gen_add_i32(tmp, tmp, tmp2);
10494                 else
10495                     gen_add_CC(tmp, tmp, tmp2);
10496                 tcg_temp_free_i32(tmp2);
10497                 store_reg(s, rd, tmp);
10498                 break;
10499             case 3: /* sub */
10500                 if (s->condexec_mask)
10501                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10502                 else
10503                     gen_sub_CC(tmp, tmp, tmp2);
10504                 tcg_temp_free_i32(tmp2);
10505                 store_reg(s, rd, tmp);
10506                 break;
10507             }
10508         }
10509         break;
10510     case 4:
10511         if (insn & (1 << 11)) {
10512             rd = (insn >> 8) & 7;
10513             /* load pc-relative.  Bit 1 of PC is ignored.  */
10514             val = s->pc + 2 + ((insn & 0xff) * 4);
10515             val &= ~(uint32_t)2;
10516             addr = tcg_temp_new_i32();
10517             tcg_gen_movi_i32(addr, val);
10518             tmp = tcg_temp_new_i32();
10519             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10520             tcg_temp_free_i32(addr);
10521             store_reg(s, rd, tmp);
10522             break;
10523         }
10524         if (insn & (1 << 10)) {
10525             /* data processing extended or blx */
10526             rd = (insn & 7) | ((insn >> 4) & 8);
10527             rm = (insn >> 3) & 0xf;
10528             op = (insn >> 8) & 3;
10529             switch (op) {
10530             case 0: /* add */
10531                 tmp = load_reg(s, rd);
10532                 tmp2 = load_reg(s, rm);
10533                 tcg_gen_add_i32(tmp, tmp, tmp2);
10534                 tcg_temp_free_i32(tmp2);
10535                 store_reg(s, rd, tmp);
10536                 break;
10537             case 1: /* cmp */
10538                 tmp = load_reg(s, rd);
10539                 tmp2 = load_reg(s, rm);
10540                 gen_sub_CC(tmp, tmp, tmp2);
10541                 tcg_temp_free_i32(tmp2);
10542                 tcg_temp_free_i32(tmp);
10543                 break;
10544             case 2: /* mov/cpy */
10545                 tmp = load_reg(s, rm);
10546                 store_reg(s, rd, tmp);
10547                 break;
10548             case 3:/* branch [and link] exchange thumb register */
10549                 tmp = load_reg(s, rm);
10550                 if (insn & (1 << 7)) {
10551                     ARCH(5);
10552                     val = (uint32_t)s->pc | 1;
10553                     tmp2 = tcg_temp_new_i32();
10554                     tcg_gen_movi_i32(tmp2, val);
10555                     store_reg(s, 14, tmp2);
10556                 }
10557                 /* already thumb, no need to check */
10558                 gen_bx(s, tmp);
10559                 break;
10560             }
10561             break;
10562         }
10563
10564         /* data processing register */
10565         rd = insn & 7;
10566         rm = (insn >> 3) & 7;
10567         op = (insn >> 6) & 0xf;
10568         if (op == 2 || op == 3 || op == 4 || op == 7) {
10569             /* the shift/rotate ops want the operands backwards */
10570             val = rm;
10571             rm = rd;
10572             rd = val;
10573             val = 1;
10574         } else {
10575             val = 0;
10576         }
10577
10578         if (op == 9) { /* neg */
10579             tmp = tcg_temp_new_i32();
10580             tcg_gen_movi_i32(tmp, 0);
10581         } else if (op != 0xf) { /* mvn doesn't read its first operand */
10582             tmp = load_reg(s, rd);
10583         } else {
10584             TCGV_UNUSED_I32(tmp);
10585         }
10586
10587         tmp2 = load_reg(s, rm);
10588         switch (op) {
10589         case 0x0: /* and */
10590             tcg_gen_and_i32(tmp, tmp, tmp2);
10591             if (!s->condexec_mask)
10592                 gen_logic_CC(tmp);
10593             break;
10594         case 0x1: /* eor */
10595             tcg_gen_xor_i32(tmp, tmp, tmp2);
10596             if (!s->condexec_mask)
10597                 gen_logic_CC(tmp);
10598             break;
10599         case 0x2: /* lsl */
10600             if (s->condexec_mask) {
10601                 gen_shl(tmp2, tmp2, tmp);
10602             } else {
10603                 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
10604                 gen_logic_CC(tmp2);
10605             }
10606             break;
10607         case 0x3: /* lsr */
10608             if (s->condexec_mask) {
10609                 gen_shr(tmp2, tmp2, tmp);
10610             } else {
10611                 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
10612                 gen_logic_CC(tmp2);
10613             }
10614             break;
10615         case 0x4: /* asr */
10616             if (s->condexec_mask) {
10617                 gen_sar(tmp2, tmp2, tmp);
10618             } else {
10619                 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
10620                 gen_logic_CC(tmp2);
10621             }
10622             break;
10623         case 0x5: /* adc */
10624             if (s->condexec_mask) {
10625                 gen_adc(tmp, tmp2);
10626             } else {
10627                 gen_adc_CC(tmp, tmp, tmp2);
10628             }
10629             break;
10630         case 0x6: /* sbc */
10631             if (s->condexec_mask) {
10632                 gen_sub_carry(tmp, tmp, tmp2);
10633             } else {
10634                 gen_sbc_CC(tmp, tmp, tmp2);
10635             }
10636             break;
10637         case 0x7: /* ror */
10638             if (s->condexec_mask) {
10639                 tcg_gen_andi_i32(tmp, tmp, 0x1f);
10640                 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
10641             } else {
10642                 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
10643                 gen_logic_CC(tmp2);
10644             }
10645             break;
10646         case 0x8: /* tst */
10647             tcg_gen_and_i32(tmp, tmp, tmp2);
10648             gen_logic_CC(tmp);
10649             rd = 16;
10650             break;
10651         case 0x9: /* neg */
10652             if (s->condexec_mask)
10653                 tcg_gen_neg_i32(tmp, tmp2);
10654             else
10655                 gen_sub_CC(tmp, tmp, tmp2);
10656             break;
10657         case 0xa: /* cmp */
10658             gen_sub_CC(tmp, tmp, tmp2);
10659             rd = 16;
10660             break;
10661         case 0xb: /* cmn */
10662             gen_add_CC(tmp, tmp, tmp2);
10663             rd = 16;
10664             break;
10665         case 0xc: /* orr */
10666             tcg_gen_or_i32(tmp, tmp, tmp2);
10667             if (!s->condexec_mask)
10668                 gen_logic_CC(tmp);
10669             break;
10670         case 0xd: /* mul */
10671             tcg_gen_mul_i32(tmp, tmp, tmp2);
10672             if (!s->condexec_mask)
10673                 gen_logic_CC(tmp);
10674             break;
10675         case 0xe: /* bic */
10676             tcg_gen_andc_i32(tmp, tmp, tmp2);
10677             if (!s->condexec_mask)
10678                 gen_logic_CC(tmp);
10679             break;
10680         case 0xf: /* mvn */
10681             tcg_gen_not_i32(tmp2, tmp2);
10682             if (!s->condexec_mask)
10683                 gen_logic_CC(tmp2);
10684             val = 1;
10685             rm = rd;
10686             break;
10687         }
10688         if (rd != 16) {
10689             if (val) {
10690                 store_reg(s, rm, tmp2);
10691                 if (op != 0xf)
10692                     tcg_temp_free_i32(tmp);
10693             } else {
10694                 store_reg(s, rd, tmp);
10695                 tcg_temp_free_i32(tmp2);
10696             }
10697         } else {
10698             tcg_temp_free_i32(tmp);
10699             tcg_temp_free_i32(tmp2);
10700         }
10701         break;
10702
10703     case 5:
10704         /* load/store register offset.  */
10705         rd = insn & 7;
10706         rn = (insn >> 3) & 7;
10707         rm = (insn >> 6) & 7;
10708         op = (insn >> 9) & 7;
10709         addr = load_reg(s, rn);
10710         tmp = load_reg(s, rm);
10711         tcg_gen_add_i32(addr, addr, tmp);
10712         tcg_temp_free_i32(tmp);
10713
10714         if (op < 3) { /* store */
10715             tmp = load_reg(s, rd);
10716         } else {
10717             tmp = tcg_temp_new_i32();
10718         }
10719
10720         switch (op) {
10721         case 0: /* str */
10722             gen_aa32_st32(tmp, addr, get_mem_index(s));
10723             break;
10724         case 1: /* strh */
10725             gen_aa32_st16(tmp, addr, get_mem_index(s));
10726             break;
10727         case 2: /* strb */
10728             gen_aa32_st8(tmp, addr, get_mem_index(s));
10729             break;
10730         case 3: /* ldrsb */
10731             gen_aa32_ld8s(tmp, addr, get_mem_index(s));
10732             break;
10733         case 4: /* ldr */
10734             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10735             break;
10736         case 5: /* ldrh */
10737             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10738             break;
10739         case 6: /* ldrb */
10740             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10741             break;
10742         case 7: /* ldrsh */
10743             gen_aa32_ld16s(tmp, addr, get_mem_index(s));
10744             break;
10745         }
10746         if (op >= 3) { /* load */
10747             store_reg(s, rd, tmp);
10748         } else {
10749             tcg_temp_free_i32(tmp);
10750         }
10751         tcg_temp_free_i32(addr);
10752         break;
10753
10754     case 6:
10755         /* load/store word immediate offset */
10756         rd = insn & 7;
10757         rn = (insn >> 3) & 7;
10758         addr = load_reg(s, rn);
10759         val = (insn >> 4) & 0x7c;
10760         tcg_gen_addi_i32(addr, addr, val);
10761
10762         if (insn & (1 << 11)) {
10763             /* load */
10764             tmp = tcg_temp_new_i32();
10765             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10766             store_reg(s, rd, tmp);
10767         } else {
10768             /* store */
10769             tmp = load_reg(s, rd);
10770             gen_aa32_st32(tmp, addr, get_mem_index(s));
10771             tcg_temp_free_i32(tmp);
10772         }
10773         tcg_temp_free_i32(addr);
10774         break;
10775
10776     case 7:
10777         /* load/store byte immediate offset */
10778         rd = insn & 7;
10779         rn = (insn >> 3) & 7;
10780         addr = load_reg(s, rn);
10781         val = (insn >> 6) & 0x1f;
10782         tcg_gen_addi_i32(addr, addr, val);
10783
10784         if (insn & (1 << 11)) {
10785             /* load */
10786             tmp = tcg_temp_new_i32();
10787             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10788             store_reg(s, rd, tmp);
10789         } else {
10790             /* store */
10791             tmp = load_reg(s, rd);
10792             gen_aa32_st8(tmp, addr, get_mem_index(s));
10793             tcg_temp_free_i32(tmp);
10794         }
10795         tcg_temp_free_i32(addr);
10796         break;
10797
10798     case 8:
10799         /* load/store halfword immediate offset */
10800         rd = insn & 7;
10801         rn = (insn >> 3) & 7;
10802         addr = load_reg(s, rn);
10803         val = (insn >> 5) & 0x3e;
10804         tcg_gen_addi_i32(addr, addr, val);
10805
10806         if (insn & (1 << 11)) {
10807             /* load */
10808             tmp = tcg_temp_new_i32();
10809             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10810             store_reg(s, rd, tmp);
10811         } else {
10812             /* store */
10813             tmp = load_reg(s, rd);
10814             gen_aa32_st16(tmp, addr, get_mem_index(s));
10815             tcg_temp_free_i32(tmp);
10816         }
10817         tcg_temp_free_i32(addr);
10818         break;
10819
10820     case 9:
10821         /* load/store from stack */
10822         rd = (insn >> 8) & 7;
10823         addr = load_reg(s, 13);
10824         val = (insn & 0xff) * 4;
10825         tcg_gen_addi_i32(addr, addr, val);
10826
10827         if (insn & (1 << 11)) {
10828             /* load */
10829             tmp = tcg_temp_new_i32();
10830             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10831             store_reg(s, rd, tmp);
10832         } else {
10833             /* store */
10834             tmp = load_reg(s, rd);
10835             gen_aa32_st32(tmp, addr, get_mem_index(s));
10836             tcg_temp_free_i32(tmp);
10837         }
10838         tcg_temp_free_i32(addr);
10839         break;
10840
10841     case 10:
10842         /* add to high reg */
10843         rd = (insn >> 8) & 7;
10844         if (insn & (1 << 11)) {
10845             /* SP */
10846             tmp = load_reg(s, 13);
10847         } else {
10848             /* PC. bit 1 is ignored.  */
10849             tmp = tcg_temp_new_i32();
10850             tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
10851         }
10852         val = (insn & 0xff) * 4;
10853         tcg_gen_addi_i32(tmp, tmp, val);
10854         store_reg(s, rd, tmp);
10855         break;
10856
10857     case 11:
10858         /* misc */
10859         op = (insn >> 8) & 0xf;
10860         switch (op) {
10861         case 0:
10862             /* adjust stack pointer */
10863             tmp = load_reg(s, 13);
10864             val = (insn & 0x7f) * 4;
10865             if (insn & (1 << 7))
10866                 val = -(int32_t)val;
10867             tcg_gen_addi_i32(tmp, tmp, val);
10868             store_reg(s, 13, tmp);
10869             break;
10870
10871         case 2: /* sign/zero extend.  */
10872             ARCH(6);
10873             rd = insn & 7;
10874             rm = (insn >> 3) & 7;
10875             tmp = load_reg(s, rm);
10876             switch ((insn >> 6) & 3) {
10877             case 0: gen_sxth(tmp); break;
10878             case 1: gen_sxtb(tmp); break;
10879             case 2: gen_uxth(tmp); break;
10880             case 3: gen_uxtb(tmp); break;
10881             }
10882             store_reg(s, rd, tmp);
10883             break;
10884         case 4: case 5: case 0xc: case 0xd:
10885             /* push/pop */
10886             addr = load_reg(s, 13);
10887             if (insn & (1 << 8))
10888                 offset = 4;
10889             else
10890                 offset = 0;
10891             for (i = 0; i < 8; i++) {
10892                 if (insn & (1 << i))
10893                     offset += 4;
10894             }
10895             if ((insn & (1 << 11)) == 0) {
10896                 tcg_gen_addi_i32(addr, addr, -offset);
10897             }
10898             for (i = 0; i < 8; i++) {
10899                 if (insn & (1 << i)) {
10900                     if (insn & (1 << 11)) {
10901                         /* pop */
10902                         tmp = tcg_temp_new_i32();
10903                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10904                         store_reg(s, i, tmp);
10905                     } else {
10906                         /* push */
10907                         tmp = load_reg(s, i);
10908                         gen_aa32_st32(tmp, addr, get_mem_index(s));
10909                         tcg_temp_free_i32(tmp);
10910                     }
10911                     /* advance to the next address.  */
10912                     tcg_gen_addi_i32(addr, addr, 4);
10913                 }
10914             }
10915             TCGV_UNUSED_I32(tmp);
10916             if (insn & (1 << 8)) {
10917                 if (insn & (1 << 11)) {
10918                     /* pop pc */
10919                     tmp = tcg_temp_new_i32();
10920                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10921                     /* don't set the pc until the rest of the instruction
10922                        has completed */
10923                 } else {
10924                     /* push lr */
10925                     tmp = load_reg(s, 14);
10926                     gen_aa32_st32(tmp, addr, get_mem_index(s));
10927                     tcg_temp_free_i32(tmp);
10928                 }
10929                 tcg_gen_addi_i32(addr, addr, 4);
10930             }
10931             if ((insn & (1 << 11)) == 0) {
10932                 tcg_gen_addi_i32(addr, addr, -offset);
10933             }
10934             /* write back the new stack pointer */
10935             store_reg(s, 13, addr);
10936             /* set the new PC value */
10937             if ((insn & 0x0900) == 0x0900) {
10938                 store_reg_from_load(s, 15, tmp);
10939             }
10940             break;
10941
10942         case 1: case 3: case 9: case 11: /* czb */
10943             rm = insn & 7;
10944             tmp = load_reg(s, rm);
10945             s->condlabel = gen_new_label();
10946             s->condjmp = 1;
10947             if (insn & (1 << 11))
10948                 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
10949             else
10950                 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
10951             tcg_temp_free_i32(tmp);
10952             offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
10953             val = (uint32_t)s->pc + 2;
10954             val += offset;
10955             gen_jmp(s, val);
10956             break;
10957
10958         case 15: /* IT, nop-hint.  */
10959             if ((insn & 0xf) == 0) {
10960                 gen_nop_hint(s, (insn >> 4) & 0xf);
10961                 break;
10962             }
10963             /* If Then.  */
10964             s->condexec_cond = (insn >> 4) & 0xe;
10965             s->condexec_mask = insn & 0x1f;
10966             /* No actual code generated for this insn, just setup state.  */
10967             break;
10968
10969         case 0xe: /* bkpt */
10970         {
10971             int imm8 = extract32(insn, 0, 8);
10972             ARCH(5);
10973             gen_exception_insn(s, 2, EXCP_BKPT, syn_aa32_bkpt(imm8, true),
10974                                default_exception_el(s));
10975             break;
10976         }
10977
10978         case 0xa: /* rev */
10979             ARCH(6);
10980             rn = (insn >> 3) & 0x7;
10981             rd = insn & 0x7;
10982             tmp = load_reg(s, rn);
10983             switch ((insn >> 6) & 3) {
10984             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
10985             case 1: gen_rev16(tmp); break;
10986             case 3: gen_revsh(tmp); break;
10987             default: goto illegal_op;
10988             }
10989             store_reg(s, rd, tmp);
10990             break;
10991
10992         case 6:
10993             switch ((insn >> 5) & 7) {
10994             case 2:
10995                 /* setend */
10996                 ARCH(6);
10997                 if (((insn >> 3) & 1) != s->bswap_code) {
10998                     /* Dynamic endianness switching not implemented. */
10999                     qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
11000                     goto illegal_op;
11001                 }
11002                 break;
11003             case 3:
11004                 /* cps */
11005                 ARCH(6);
11006                 if (IS_USER(s)) {
11007                     break;
11008                 }
11009                 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11010                     tmp = tcg_const_i32((insn & (1 << 4)) != 0);
11011                     /* FAULTMASK */
11012                     if (insn & 1) {
11013                         addr = tcg_const_i32(19);
11014                         gen_helper_v7m_msr(cpu_env, addr, tmp);
11015                         tcg_temp_free_i32(addr);
11016                     }
11017                     /* PRIMASK */
11018                     if (insn & 2) {
11019                         addr = tcg_const_i32(16);
11020                         gen_helper_v7m_msr(cpu_env, addr, tmp);
11021                         tcg_temp_free_i32(addr);
11022                     }
11023                     tcg_temp_free_i32(tmp);
11024                     gen_lookup_tb(s);
11025                 } else {
11026                     if (insn & (1 << 4)) {
11027                         shift = CPSR_A | CPSR_I | CPSR_F;
11028                     } else {
11029                         shift = 0;
11030                     }
11031                     gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
11032                 }
11033                 break;
11034             default:
11035                 goto undef;
11036             }
11037             break;
11038
11039         default:
11040             goto undef;
11041         }
11042         break;
11043
11044     case 12:
11045     {
11046         /* load/store multiple */
11047         TCGv_i32 loaded_var;
11048         TCGV_UNUSED_I32(loaded_var);
11049         rn = (insn >> 8) & 0x7;
11050         addr = load_reg(s, rn);
11051         for (i = 0; i < 8; i++) {
11052             if (insn & (1 << i)) {
11053                 if (insn & (1 << 11)) {
11054                     /* load */
11055                     tmp = tcg_temp_new_i32();
11056                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
11057                     if (i == rn) {
11058                         loaded_var = tmp;
11059                     } else {
11060                         store_reg(s, i, tmp);
11061                     }
11062                 } else {
11063                     /* store */
11064                     tmp = load_reg(s, i);
11065                     gen_aa32_st32(tmp, addr, get_mem_index(s));
11066                     tcg_temp_free_i32(tmp);
11067                 }
11068                 /* advance to the next address */
11069                 tcg_gen_addi_i32(addr, addr, 4);
11070             }
11071         }
11072         if ((insn & (1 << rn)) == 0) {
11073             /* base reg not in list: base register writeback */
11074             store_reg(s, rn, addr);
11075         } else {
11076             /* base reg in list: if load, complete it now */
11077             if (insn & (1 << 11)) {
11078                 store_reg(s, rn, loaded_var);
11079             }
11080             tcg_temp_free_i32(addr);
11081         }
11082         break;
11083     }
11084     case 13:
11085         /* conditional branch or swi */
11086         cond = (insn >> 8) & 0xf;
11087         if (cond == 0xe)
11088             goto undef;
11089
11090         if (cond == 0xf) {
11091             /* swi */
11092             gen_set_pc_im(s, s->pc);
11093             s->svc_imm = extract32(insn, 0, 8);
11094             s->is_jmp = DISAS_SWI;
11095             break;
11096         }
11097         /* generate a conditional jump to next instruction */
11098         s->condlabel = gen_new_label();
11099         arm_gen_test_cc(cond ^ 1, s->condlabel);
11100         s->condjmp = 1;
11101
11102         /* jump to the offset */
11103         val = (uint32_t)s->pc + 2;
11104         offset = ((int32_t)insn << 24) >> 24;
11105         val += offset << 1;
11106         gen_jmp(s, val);
11107         break;
11108
11109     case 14:
11110         if (insn & (1 << 11)) {
11111             if (disas_thumb2_insn(env, s, insn))
11112               goto undef32;
11113             break;
11114         }
11115         /* unconditional branch */
11116         val = (uint32_t)s->pc;
11117         offset = ((int32_t)insn << 21) >> 21;
11118         val += (offset << 1) + 2;
11119         gen_jmp(s, val);
11120         break;
11121
11122     case 15:
11123         if (disas_thumb2_insn(env, s, insn))
11124             goto undef32;
11125         break;
11126     }
11127     return;
11128 undef32:
11129     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
11130                        default_exception_el(s));
11131     return;
11132 illegal_op:
11133 undef:
11134     gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(),
11135                        default_exception_el(s));
11136 }
11137
11138 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
11139    basic block 'tb'. If search_pc is TRUE, also generate PC
11140    information for each intermediate instruction. */
11141 static inline void gen_intermediate_code_internal(ARMCPU *cpu,
11142                                                   TranslationBlock *tb,
11143                                                   bool search_pc)
11144 {
11145     CPUState *cs = CPU(cpu);
11146     CPUARMState *env = &cpu->env;
11147     DisasContext dc1, *dc = &dc1;
11148     CPUBreakpoint *bp;
11149     int j, lj;
11150     target_ulong pc_start;
11151     target_ulong next_page_start;
11152     int num_insns;
11153     int max_insns;
11154
11155     /* generate intermediate code */
11156
11157     /* The A64 decoder has its own top level loop, because it doesn't need
11158      * the A32/T32 complexity to do with conditional execution/IT blocks/etc.
11159      */
11160     if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
11161         gen_intermediate_code_internal_a64(cpu, tb, search_pc);
11162         return;
11163     }
11164
11165     pc_start = tb->pc;
11166
11167     dc->tb = tb;
11168
11169     dc->is_jmp = DISAS_NEXT;
11170     dc->pc = pc_start;
11171     dc->singlestep_enabled = cs->singlestep_enabled;
11172     dc->condjmp = 0;
11173
11174     dc->aarch64 = 0;
11175     dc->el3_is_aa64 = arm_el_is_aa64(env, 3);
11176     dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
11177     dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
11178     dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
11179     dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
11180     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11181     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11182 #if !defined(CONFIG_USER_ONLY)
11183     dc->user = (dc->current_el == 0);
11184 #endif
11185     dc->ns = ARM_TBFLAG_NS(tb->flags);
11186     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11187     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
11188     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
11189     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
11190     dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
11191     dc->cp_regs = cpu->cp_regs;
11192     dc->features = env->features;
11193
11194     /* Single step state. The code-generation logic here is:
11195      *  SS_ACTIVE == 0:
11196      *   generate code with no special handling for single-stepping (except
11197      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11198      *   this happens anyway because those changes are all system register or
11199      *   PSTATE writes).
11200      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11201      *   emit code for one insn
11202      *   emit code to clear PSTATE.SS
11203      *   emit code to generate software step exception for completed step
11204      *   end TB (as usual for having generated an exception)
11205      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11206      *   emit code to generate a software step exception
11207      *   end the TB
11208      */
11209     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11210     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11211     dc->is_ldex = false;
11212     dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
11213
11214     cpu_F0s = tcg_temp_new_i32();
11215     cpu_F1s = tcg_temp_new_i32();
11216     cpu_F0d = tcg_temp_new_i64();
11217     cpu_F1d = tcg_temp_new_i64();
11218     cpu_V0 = cpu_F0d;
11219     cpu_V1 = cpu_F1d;
11220     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
11221     cpu_M0 = tcg_temp_new_i64();
11222     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11223     lj = -1;
11224     num_insns = 0;
11225     max_insns = tb->cflags & CF_COUNT_MASK;
11226     if (max_insns == 0)
11227         max_insns = CF_COUNT_MASK;
11228
11229     gen_tb_start(tb);
11230
11231     tcg_clear_temp_count();
11232
11233     /* A note on handling of the condexec (IT) bits:
11234      *
11235      * We want to avoid the overhead of having to write the updated condexec
11236      * bits back to the CPUARMState for every instruction in an IT block. So:
11237      * (1) if the condexec bits are not already zero then we write
11238      * zero back into the CPUARMState now. This avoids complications trying
11239      * to do it at the end of the block. (For example if we don't do this
11240      * it's hard to identify whether we can safely skip writing condexec
11241      * at the end of the TB, which we definitely want to do for the case
11242      * where a TB doesn't do anything with the IT state at all.)
11243      * (2) if we are going to leave the TB then we call gen_set_condexec()
11244      * which will write the correct value into CPUARMState if zero is wrong.
11245      * This is done both for leaving the TB at the end, and for leaving
11246      * it because of an exception we know will happen, which is done in
11247      * gen_exception_insn(). The latter is necessary because we need to
11248      * leave the TB with the PC/IT state just prior to execution of the
11249      * instruction which caused the exception.
11250      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11251      * then the CPUARMState will be wrong and we need to reset it.
11252      * This is handled in the same way as restoration of the
11253      * PC in these situations: we will be called again with search_pc=1
11254      * and generate a mapping of the condexec bits for each PC in
11255      * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
11256      * this to restore the condexec bits.
11257      *
11258      * Note that there are no instructions which can read the condexec
11259      * bits, and none which can write non-static values to them, so
11260      * we don't need to care about whether CPUARMState is correct in the
11261      * middle of a TB.
11262      */
11263
11264     /* Reset the conditional execution bits immediately. This avoids
11265        complications trying to do it at the end of the block.  */
11266     if (dc->condexec_mask || dc->condexec_cond)
11267       {
11268         TCGv_i32 tmp = tcg_temp_new_i32();
11269         tcg_gen_movi_i32(tmp, 0);
11270         store_cpu_field(tmp, condexec_bits);
11271       }
11272     do {
11273 #ifdef CONFIG_USER_ONLY
11274         /* Intercept jump to the magic kernel page.  */
11275         if (dc->pc >= 0xffff0000) {
11276             /* We always get here via a jump, so know we are not in a
11277                conditional execution block.  */
11278             gen_exception_internal(EXCP_KERNEL_TRAP);
11279             dc->is_jmp = DISAS_UPDATE;
11280             break;
11281         }
11282 #else
11283         if (dc->pc >= 0xfffffff0 && arm_dc_feature(dc, ARM_FEATURE_M)) {
11284             /* We always get here via a jump, so know we are not in a
11285                conditional execution block.  */
11286             gen_exception_internal(EXCP_EXCEPTION_EXIT);
11287             dc->is_jmp = DISAS_UPDATE;
11288             break;
11289         }
11290 #endif
11291
11292         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11293             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11294                 if (bp->pc == dc->pc) {
11295                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11296                     /* Advance PC so that clearing the breakpoint will
11297                        invalidate this TB.  */
11298                     dc->pc += 2;
11299                     goto done_generating;
11300                 }
11301             }
11302         }
11303         if (search_pc) {
11304             j = tcg_op_buf_count();
11305             if (lj < j) {
11306                 lj++;
11307                 while (lj < j)
11308                     tcg_ctx.gen_opc_instr_start[lj++] = 0;
11309             }
11310             tcg_ctx.gen_opc_pc[lj] = dc->pc;
11311             gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
11312             tcg_ctx.gen_opc_instr_start[lj] = 1;
11313             tcg_ctx.gen_opc_icount[lj] = num_insns;
11314         }
11315
11316         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
11317             gen_io_start();
11318
11319         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
11320             tcg_gen_debug_insn_start(dc->pc);
11321         }
11322
11323         if (dc->ss_active && !dc->pstate_ss) {
11324             /* Singlestep state is Active-pending.
11325              * If we're in this state at the start of a TB then either
11326              *  a) we just took an exception to an EL which is being debugged
11327              *     and this is the first insn in the exception handler
11328              *  b) debug exceptions were masked and we just unmasked them
11329              *     without changing EL (eg by clearing PSTATE.D)
11330              * In either case we're going to take a swstep exception in the
11331              * "did not step an insn" case, and so the syndrome ISV and EX
11332              * bits should be zero.
11333              */
11334             assert(num_insns == 0);
11335             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11336                           default_exception_el(dc));
11337             goto done_generating;
11338         }
11339
11340         if (dc->thumb) {
11341             disas_thumb_insn(env, dc);
11342             if (dc->condexec_mask) {
11343                 dc->condexec_cond = (dc->condexec_cond & 0xe)
11344                                    | ((dc->condexec_mask >> 4) & 1);
11345                 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11346                 if (dc->condexec_mask == 0) {
11347                     dc->condexec_cond = 0;
11348                 }
11349             }
11350         } else {
11351             unsigned int insn = arm_ldl_code(env, dc->pc, dc->bswap_code);
11352             dc->pc += 4;
11353             disas_arm_insn(dc, insn);
11354         }
11355
11356         if (dc->condjmp && !dc->is_jmp) {
11357             gen_set_label(dc->condlabel);
11358             dc->condjmp = 0;
11359         }
11360
11361         if (tcg_check_temp_count()) {
11362             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11363                     dc->pc);
11364         }
11365
11366         /* Translation stops when a conditional branch is encountered.
11367          * Otherwise the subsequent code could get translated several times.
11368          * Also stop translation when a page boundary is reached.  This
11369          * ensures prefetch aborts occur at the right place.  */
11370         num_insns ++;
11371     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11372              !cs->singlestep_enabled &&
11373              !singlestep &&
11374              !dc->ss_active &&
11375              dc->pc < next_page_start &&
11376              num_insns < max_insns);
11377
11378     if (tb->cflags & CF_LAST_IO) {
11379         if (dc->condjmp) {
11380             /* FIXME:  This can theoretically happen with self-modifying
11381                code.  */
11382             cpu_abort(cs, "IO on conditional branch instruction");
11383         }
11384         gen_io_end();
11385     }
11386
11387     /* At this stage dc->condjmp will only be set when the skipped
11388        instruction was a conditional branch or trap, and the PC has
11389        already been written.  */
11390     if (unlikely(cs->singlestep_enabled || dc->ss_active)) {
11391         /* Make sure the pc is updated, and raise a debug exception.  */
11392         if (dc->condjmp) {
11393             gen_set_condexec(dc);
11394             if (dc->is_jmp == DISAS_SWI) {
11395                 gen_ss_advance(dc);
11396                 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11397                               default_exception_el(dc));
11398             } else if (dc->is_jmp == DISAS_HVC) {
11399                 gen_ss_advance(dc);
11400                 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11401             } else if (dc->is_jmp == DISAS_SMC) {
11402                 gen_ss_advance(dc);
11403                 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11404             } else if (dc->ss_active) {
11405                 gen_step_complete_exception(dc);
11406             } else {
11407                 gen_exception_internal(EXCP_DEBUG);
11408             }
11409             gen_set_label(dc->condlabel);
11410         }
11411         if (dc->condjmp || !dc->is_jmp) {
11412             gen_set_pc_im(dc, dc->pc);
11413             dc->condjmp = 0;
11414         }
11415         gen_set_condexec(dc);
11416         if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
11417             gen_ss_advance(dc);
11418             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11419                           default_exception_el(dc));
11420         } else if (dc->is_jmp == DISAS_HVC && !dc->condjmp) {
11421             gen_ss_advance(dc);
11422             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11423         } else if (dc->is_jmp == DISAS_SMC && !dc->condjmp) {
11424             gen_ss_advance(dc);
11425             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11426         } else if (dc->ss_active) {
11427             gen_step_complete_exception(dc);
11428         } else {
11429             /* FIXME: Single stepping a WFI insn will not halt
11430                the CPU.  */
11431             gen_exception_internal(EXCP_DEBUG);
11432         }
11433     } else {
11434         /* While branches must always occur at the end of an IT block,
11435            there are a few other things that can cause us to terminate
11436            the TB in the middle of an IT block:
11437             - Exception generating instructions (bkpt, swi, undefined).
11438             - Page boundaries.
11439             - Hardware watchpoints.
11440            Hardware breakpoints have already been handled and skip this code.
11441          */
11442         gen_set_condexec(dc);
11443         switch(dc->is_jmp) {
11444         case DISAS_NEXT:
11445             gen_goto_tb(dc, 1, dc->pc);
11446             break;
11447         default:
11448         case DISAS_JUMP:
11449         case DISAS_UPDATE:
11450             /* indicate that the hash table must be used to find the next TB */
11451             tcg_gen_exit_tb(0);
11452             break;
11453         case DISAS_TB_JUMP:
11454             /* nothing more to generate */
11455             break;
11456         case DISAS_WFI:
11457             gen_helper_wfi(cpu_env);
11458             /* The helper doesn't necessarily throw an exception, but we
11459              * must go back to the main loop to check for interrupts anyway.
11460              */
11461             tcg_gen_exit_tb(0);
11462             break;
11463         case DISAS_WFE:
11464             gen_helper_wfe(cpu_env);
11465             break;
11466         case DISAS_YIELD:
11467             gen_helper_yield(cpu_env);
11468             break;
11469         case DISAS_SWI:
11470             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11471                           default_exception_el(dc));
11472             break;
11473         case DISAS_HVC:
11474             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11475             break;
11476         case DISAS_SMC:
11477             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11478             break;
11479         }
11480         if (dc->condjmp) {
11481             gen_set_label(dc->condlabel);
11482             gen_set_condexec(dc);
11483             gen_goto_tb(dc, 1, dc->pc);
11484             dc->condjmp = 0;
11485         }
11486     }
11487
11488 done_generating:
11489     gen_tb_end(tb, num_insns);
11490
11491 #ifdef DEBUG_DISAS
11492     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11493         qemu_log("----------------\n");
11494         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11495         log_target_disas(cs, pc_start, dc->pc - pc_start,
11496                          dc->thumb | (dc->bswap_code << 1));
11497         qemu_log("\n");
11498     }
11499 #endif
11500     if (search_pc) {
11501         j = tcg_op_buf_count();
11502         lj++;
11503         while (lj <= j)
11504             tcg_ctx.gen_opc_instr_start[lj++] = 0;
11505     } else {
11506         tb->size = dc->pc - pc_start;
11507         tb->icount = num_insns;
11508     }
11509 }
11510
11511 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
11512 {
11513     gen_intermediate_code_internal(arm_env_get_cpu(env), tb, false);
11514 }
11515
11516 void gen_intermediate_code_pc(CPUARMState *env, TranslationBlock *tb)
11517 {
11518     gen_intermediate_code_internal(arm_env_get_cpu(env), tb, true);
11519 }
11520
11521 static const char *cpu_mode_names[16] = {
11522   "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
11523   "???", "???", "hyp", "und", "???", "???", "???", "sys"
11524 };
11525
11526 void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
11527                         int flags)
11528 {
11529     ARMCPU *cpu = ARM_CPU(cs);
11530     CPUARMState *env = &cpu->env;
11531     int i;
11532     uint32_t psr;
11533
11534     if (is_a64(env)) {
11535         aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
11536         return;
11537     }
11538
11539     for(i=0;i<16;i++) {
11540         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
11541         if ((i % 4) == 3)
11542             cpu_fprintf(f, "\n");
11543         else
11544             cpu_fprintf(f, " ");
11545     }
11546     psr = cpsr_read(env);
11547     cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n",
11548                 psr,
11549                 psr & (1 << 31) ? 'N' : '-',
11550                 psr & (1 << 30) ? 'Z' : '-',
11551                 psr & (1 << 29) ? 'C' : '-',
11552                 psr & (1 << 28) ? 'V' : '-',
11553                 psr & CPSR_T ? 'T' : 'A',
11554                 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
11555
11556     if (flags & CPU_DUMP_FPU) {
11557         int numvfpregs = 0;
11558         if (arm_feature(env, ARM_FEATURE_VFP)) {
11559             numvfpregs += 16;
11560         }
11561         if (arm_feature(env, ARM_FEATURE_VFP3)) {
11562             numvfpregs += 16;
11563         }
11564         for (i = 0; i < numvfpregs; i++) {
11565             uint64_t v = float64_val(env->vfp.regs[i]);
11566             cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
11567                         i * 2, (uint32_t)v,
11568                         i * 2 + 1, (uint32_t)(v >> 32),
11569                         i, v);
11570         }
11571         cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
11572     }
11573 }
11574
11575 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
11576 {
11577     if (is_a64(env)) {
11578         env->pc = tcg_ctx.gen_opc_pc[pc_pos];
11579         env->condexec_bits = 0;
11580     } else {
11581         env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
11582         env->condexec_bits = gen_opc_condexec_bits[pc_pos];
11583     }
11584 }