These changes are the raw update to qemu-2.6.
[kvmfornfv.git] / qemu / target-i386 / translate.c
index 82e2245..1a1214d 100644 (file)
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
-#include <stdarg.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include <signal.h>
+#include "qemu/osdep.h"
 
 #include "qemu/host-utils.h"
 #include "cpu.h"
@@ -33,6 +28,7 @@
 #include "exec/helper-gen.h"
 
 #include "trace-tcg.h"
+#include "exec/log.h"
 
 
 #define PREFIX_REPZ   0x01
 # define clztl  clz32
 #endif
 
+/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
+#define CASE_MODRM_MEM_OP(OP) \
+    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
+    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
+    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
+
+#define CASE_MODRM_OP(OP) \
+    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
+    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
+    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
+    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
+
 //#define MACRO_TEST   1
 
 /* global register indexes */
-static TCGv_ptr cpu_env;
+static TCGv_env cpu_env;
 static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv cpu_seg_base[6];
+static TCGv_i64 cpu_bndl[4];
+static TCGv_i64 cpu_bndu[4];
 /* local temps */
-static TCGv cpu_T[2];
+static TCGv cpu_T0, cpu_T1;
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp4;
 static TCGv_ptr cpu_ptr0, cpu_ptr1;
 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;
 
-static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
-
 #include "exec/gen-icount.h"
 
 #ifdef TARGET_X86_64
@@ -90,6 +99,7 @@ typedef struct DisasContext {
     int prefix;
     TCGMemOp aflag;
     TCGMemOp dflag;
+    target_ulong pc_start;
     target_ulong pc; /* pc = eip + cs_base */
     int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
                    static state change (stop translation) */
@@ -126,6 +136,7 @@ typedef struct DisasContext {
     int cpuid_ext2_features;
     int cpuid_ext3_features;
     int cpuid_7_0_ebx_features;
+    int cpuid_xsave_features;
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
@@ -310,6 +321,12 @@ static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
     }
 }
 
+/* Select the size of the stack pointer.  */
+static inline TCGMemOp mo_stacksize(DisasContext *s)
+{
+    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
+}
+
 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
 static inline TCGMemOp mo_64_32(TCGMemOp ot)
 {
@@ -372,34 +389,12 @@ static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
     }
 }
 
-static inline void gen_op_movl_A0_reg(int reg)
-{
-    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
-}
-
-static inline void gen_op_addl_A0_im(int32_t val)
-{
-    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
-#ifdef TARGET_X86_64
-    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
-#endif
-}
-
-#ifdef TARGET_X86_64
-static inline void gen_op_addq_A0_im(int64_t val)
-{
-    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
-}
-#endif
-    
 static void gen_add_A0_im(DisasContext *s, int val)
 {
-#ifdef TARGET_X86_64
-    if (CODE64(s))
-        gen_op_addq_A0_im(val);
-    else
-#endif
-        gen_op_addl_A0_im(val);
+    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
+    if (!CODE64(s)) {
+        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+    }
 }
 
 static inline void gen_op_jmp_v(TCGv dest)
@@ -415,68 +410,10 @@ static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 
 static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 {
-    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
+    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
     gen_op_mov_reg_v(size, reg, cpu_tmp0);
 }
 
-static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
-{
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
-    if (shift != 0)
-        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
-    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-    /* For x86_64, this sets the higher half of register to zero.
-       For i386, this is equivalent to a nop. */
-    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
-}
-
-static inline void gen_op_movl_A0_seg(int reg)
-{
-    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET);
-}
-
-static inline void gen_op_addl_A0_seg(DisasContext *s, int reg)
-{
-    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
-#ifdef TARGET_X86_64
-    if (CODE64(s)) {
-        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
-        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-    } else {
-        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
-    }
-#else
-    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-#endif
-}
-
-#ifdef TARGET_X86_64
-static inline void gen_op_movq_A0_seg(int reg)
-{
-    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base));
-}
-
-static inline void gen_op_addq_A0_seg(int reg)
-{
-    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
-    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-}
-
-static inline void gen_op_movq_A0_reg(int reg)
-{
-    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
-}
-
-static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
-{
-    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
-    if (shift != 0)
-        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
-    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-}
-#endif
-
 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 {
     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
@@ -490,9 +427,9 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 {
     if (d == OR_TMP0) {
-        gen_op_st_v(s, idx, cpu_T[0], cpu_A0);
+        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
     } else {
-        gen_op_mov_reg_v(idx, d, cpu_T[0]);
+        gen_op_mov_reg_v(idx, d, cpu_T0);
     }
 }
 
@@ -502,74 +439,77 @@ static inline void gen_jmp_im(target_ulong pc)
     gen_op_jmp_v(cpu_tmp0);
 }
 
-static inline void gen_string_movl_A0_ESI(DisasContext *s)
+/* Compute SEG:REG into A0.  SEG is selected from the override segment
+   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
+   indicate no override.  */
+static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
+                          int def_seg, int ovr_seg)
 {
-    int override;
-
-    override = s->override;
-    switch (s->aflag) {
+    switch (aflag) {
 #ifdef TARGET_X86_64
     case MO_64:
-        if (override >= 0) {
-            gen_op_movq_A0_seg(override);
-            gen_op_addq_A0_reg_sN(0, R_ESI);
-        } else {
-            gen_op_movq_A0_reg(R_ESI);
+        if (ovr_seg < 0) {
+            tcg_gen_mov_tl(cpu_A0, a0);
+            return;
         }
         break;
 #endif
     case MO_32:
         /* 32 bit address */
-        if (s->addseg && override < 0)
-            override = R_DS;
-        if (override >= 0) {
-            gen_op_movl_A0_seg(override);
-            gen_op_addl_A0_reg_sN(0, R_ESI);
-        } else {
-            gen_op_movl_A0_reg(R_ESI);
+        if (ovr_seg < 0) {
+            if (s->addseg) {
+                ovr_seg = def_seg;
+            } else {
+                tcg_gen_ext32u_tl(cpu_A0, a0);
+                return;
+            }
         }
         break;
     case MO_16:
-        /* 16 address, always override */
-        if (override < 0)
-            override = R_DS;
-        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESI]);
-        gen_op_addl_A0_seg(s, override);
+        /* 16 bit address */
+        tcg_gen_ext16u_tl(cpu_A0, a0);
+        a0 = cpu_A0;
+        if (ovr_seg < 0) {
+            if (s->addseg) {
+                ovr_seg = def_seg;
+            } else {
+                return;
+            }
+        }
         break;
     default:
         tcg_abort();
     }
-}
 
-static inline void gen_string_movl_A0_EDI(DisasContext *s)
-{
-    switch (s->aflag) {
-#ifdef TARGET_X86_64
-    case MO_64:
-        gen_op_movq_A0_reg(R_EDI);
-        break;
-#endif
-    case MO_32:
-        if (s->addseg) {
-            gen_op_movl_A0_seg(R_ES);
-            gen_op_addl_A0_reg_sN(0, R_EDI);
+    if (ovr_seg >= 0) {
+        TCGv seg = cpu_seg_base[ovr_seg];
+
+        if (aflag == MO_64) {
+            tcg_gen_add_tl(cpu_A0, a0, seg);
+        } else if (CODE64(s)) {
+            tcg_gen_ext32u_tl(cpu_A0, a0);
+            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
         } else {
-            gen_op_movl_A0_reg(R_EDI);
+            tcg_gen_add_tl(cpu_A0, a0, seg);
+            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
         }
-        break;
-    case MO_16:
-        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_EDI]);
-        gen_op_addl_A0_seg(s, R_ES);
-        break;
-    default:
-        tcg_abort();
     }
 }
 
+static inline void gen_string_movl_A0_ESI(DisasContext *s)
+{
+    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
+}
+
+static inline void gen_string_movl_A0_EDI(DisasContext *s)
+{
+    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
+}
+
 static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 {
-    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df));
-    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
+    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
+    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
 };
 
 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
@@ -664,15 +604,10 @@ static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
                          uint32_t svm_flags)
 {
-    int state_saved;
     target_ulong next_eip;
 
-    state_saved = 0;
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        gen_update_cc_op(s);
-        gen_jmp_im(cur_eip);
-        state_saved = 1;
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
         switch (ot) {
         case MO_8:
             gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
@@ -688,13 +623,11 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
         }
     }
     if(s->flags & HF_SVMI_MASK) {
-        if (!state_saved) {
-            gen_update_cc_op(s);
-            gen_jmp_im(cur_eip);
-        }
+        gen_update_cc_op(s);
+        gen_jmp_im(cur_eip);
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
         gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
                                 tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
@@ -704,9 +637,9 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
     gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -714,31 +647,31 @@ static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 
 static void gen_op_update1_cc(void)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 }
 
 static void gen_op_update2_cc(void)
 {
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 }
 
 static void gen_op_update3_cc(TCGv reg)
 {
     tcg_gen_mov_tl(cpu_cc_src2, reg);
-    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 }
 
 static inline void gen_op_testl_T0_T1_cc(void)
 {
-    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 }
 
 static void gen_op_update_neg_cc(void)
 {
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
     tcg_gen_movi_tl(cpu_cc_srcT, 0);
 }
 
@@ -1080,11 +1013,11 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
    value 'b'. In the fast case, T0 is guaranted not to be used. */
 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
+    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
 
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
-        cc.reg = cpu_T[0];
+        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
+        cc.reg = cpu_T0;
     }
     if (cc.use_reg2) {
         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
@@ -1098,12 +1031,12 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
    A translation block must end soon.  */
 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
 {
-    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
+    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
 
     gen_update_cc_op(s);
     if (cc.mask != -1) {
-        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
-        cc.reg = cpu_T[0];
+        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
+        cc.reg = cpu_T0;
     }
     set_cc_op(s, CC_OP_DYNAMIC);
     if (cc.use_reg2) {
@@ -1128,9 +1061,9 @@ static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
 
 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 {
-    gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
+    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
     gen_string_movl_A0_EDI(s);
-    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
@@ -1138,8 +1071,8 @@ static inline void gen_stos(DisasContext *s, TCGMemOp ot)
 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
-    gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
+    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
 }
@@ -1147,7 +1080,7 @@ static inline void gen_lods(DisasContext *s, TCGMemOp ot)
 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
     gen_op(s, OP_CMPL, ot, R_EAX);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
@@ -1156,7 +1089,7 @@ static inline void gen_scas(DisasContext *s, TCGMemOp ot)
 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
 {
     gen_string_movl_A0_EDI(s);
-    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
     gen_string_movl_A0_ESI(s);
     gen_op(s, OP_CMPL, ot, OR_TMP0);
     gen_op_movl_T0_Dshift(ot);
@@ -1164,6 +1097,19 @@ static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
     gen_op_add_reg_T0(s->aflag, R_EDI);
 }
 
+static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
+{
+    if (s->flags & HF_IOBPT_MASK) {
+        TCGv_i32 t_size = tcg_const_i32(1 << ot);
+        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
+
+        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
+        tcg_temp_free_i32(t_size);
+        tcg_temp_free(t_next);
+    }
+}
+
+
 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
 {
     if (s->tb->cflags & CF_USE_ICOUNT) {
@@ -1172,14 +1118,15 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot)
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
-    tcg_gen_movi_tl(cpu_T[0], 0);
-    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+    tcg_gen_movi_tl(cpu_T0, 0);
+    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
-    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
+    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (s->tb->cflags & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1191,15 +1138,15 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot)
         gen_io_start();
     }
     gen_string_movl_A0_ESI(s);
-    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
 
     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
     tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
-    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
+    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
     gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
-
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
+    gen_bpt_io(s, cpu_tmp2_i32, ot);
     if (s->tb->cflags & CF_USE_ICOUNT) {
         gen_io_end();
     }
@@ -1309,63 +1256,63 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, cpu_T[0], d);
+        gen_op_mov_v_reg(ot, cpu_T0, d);
     } else {
-        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
     }
     switch(op) {
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
-        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
         gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update3_cc(cpu_tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
         gen_compute_eflags_c(s1, cpu_tmp4);
-        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
+        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
         gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update3_cc(cpu_tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
-        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
         gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update2_cc();
         set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
-        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
-        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
         gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update2_cc();
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
-        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
         gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
         gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
-        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
         gen_op_st_rm_T0_A0(s1, ot, d);
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_CMPL:
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
-        tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
+        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     }
@@ -1375,20 +1322,20 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
     if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, cpu_T[0], d);
+        gen_op_mov_v_reg(ot, cpu_T0, d);
     } else {
-        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
     }
     gen_compute_eflags_c(s1, cpu_cc_src);
     if (c > 0) {
-        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
+        tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
         set_cc_op(s1, CC_OP_INCB + ot);
     } else {
-        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
+        tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
         set_cc_op(s1, CC_OP_DECB + ot);
     }
     gen_op_st_rm_T0_A0(s1, ot, d);
-    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 }
 
 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
@@ -1443,33 +1390,33 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T[0], op1);
+        gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
-    tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1);
+    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
+    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
 
     if (is_right) {
         if (is_arith) {
-            gen_exts(ot, cpu_T[0]);
-            tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
-            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            gen_exts(ot, cpu_T0);
+            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
         } else {
-            gen_extu(ot, cpu_T[0]);
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
-            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            gen_extu(ot, cpu_T0);
+            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
         }
     } else {
-        tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
-        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
+        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
+    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
 }
 
 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
@@ -1479,25 +1426,25 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T[0], op1);
+        gen_op_mov_v_reg(ot, cpu_T0, op1);
 
     op2 &= mask;
     if (op2 != 0) {
         if (is_right) {
             if (is_arith) {
-                gen_exts(ot, cpu_T[0]);
-                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
-                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
+                gen_exts(ot, cpu_T0);
+                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
+                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
             } else {
-                gen_extu(ot, cpu_T[0]);
-                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
-                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
+                gen_extu(ot, cpu_T0);
+                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
+                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
             }
         } else {
-            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
-            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
+            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
+            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
         }
     }
 
@@ -1507,7 +1454,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
     /* update eflags if non zero shift */
     if (op2 != 0) {
         tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
     }
 }
@@ -1519,41 +1466,41 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T[0], op1);
+        gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
 
-    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
 
     switch (ot) {
     case MO_8:
         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
-        tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
-        tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
+        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
         goto do_long;
     case MO_16:
         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
-        tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
+        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
         goto do_long;
     do_long:
 #ifdef TARGET_X86_64
     case MO_32:
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
         if (is_right) {
             tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         } else {
             tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
         }
-        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
         break;
 #endif
     default:
         if (is_right) {
-            tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
         } else {
-            tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
         }
         break;
     }
@@ -1569,12 +1516,12 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
        since we've computed the flags into CC_SRC, these variables are
        currently dead.  */
     if (is_right) {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
-        tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
+        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
     } else {
-        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
-        tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
+        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
+        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
     }
     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1585,7 +1532,7 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
        exactly as we computed above.  */
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
+    tcg_gen_trunc_tl_i32(t1, cpu_T1);
     tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
@@ -1605,9 +1552,9 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T[0], op1);
+        gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
 
     op2 &= mask;
@@ -1615,20 +1562,20 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
             if (is_right) {
                 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
             } else {
                 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
             }
-            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
             break;
 #endif
         default:
             if (is_right) {
-                tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2);
+                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
             } else {
-                tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
+                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
             }
             break;
         case MO_8:
@@ -1641,10 +1588,10 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
             if (is_right) {
                 shift = mask + 1 - shift;
             }
-            gen_extu(ot, cpu_T[0]);
-            tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift);
-            tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift);
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            gen_extu(ot, cpu_T0);
+            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
+            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
+            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
             break;
         }
     }
@@ -1661,12 +1608,12 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
            since we've computed the flags into CC_SRC, these variables are
            currently dead.  */
         if (is_right) {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
-            tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
+            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
         } else {
-            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
-            tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
+            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
+            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
         }
         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
@@ -1683,24 +1630,24 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0)
-        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
     else
-        gen_op_mov_v_reg(ot, cpu_T[0], op1);
+        gen_op_mov_v_reg(ot, cpu_T0, op1);
     
     if (is_right) {
         switch (ot) {
         case MO_8:
-            gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
         case MO_16:
-            gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
         case MO_32:
-            gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
 #endif
         default:
@@ -1709,17 +1656,17 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
     } else {
         switch (ot) {
         case MO_8:
-            gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
         case MO_16:
-            gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
         case MO_32:
-            gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
             break;
 #endif
         default:
@@ -1739,9 +1686,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
 
     /* load */
     if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
     } else {
-        gen_op_mov_v_reg(ot, cpu_T[0], op1);
+        gen_op_mov_v_reg(ot, cpu_T0, op1);
     }
 
     count = tcg_temp_new();
@@ -1753,11 +1700,11 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
            portion by constructing it as a 32-bit value.  */
         if (is_right) {
-            tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
-            tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
-            tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
+            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
+            tcg_gen_mov_tl(cpu_T1, cpu_T0);
+            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
         } else {
-            tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
+            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
         }
         /* FALLTHRU */
 #ifdef TARGET_X86_64
@@ -1765,57 +1712,57 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
-            tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
-            tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
+            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
         } else {
-            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
-            tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
-            tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
+            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
+            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
+            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
             tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
-            tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
+            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
         }
         break;
 #endif
     default:
         tcg_gen_subi_tl(cpu_tmp0, count, 1);
         if (is_right) {
-            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
-            tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
+            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
+            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
         } else {
-            tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
             if (ot == MO_16) {
                 /* Only needed if count > 16, for Intel behaviour.  */
                 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
-                tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
+                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
                 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
             }
 
             tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
-            tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
-            tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
+            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
+            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
         }
         tcg_gen_movi_tl(cpu_tmp4, 0);
-        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
-                           cpu_tmp4, cpu_T[1]);
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
+                           cpu_tmp4, cpu_T1);
+        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
         break;
     }
 
     /* store */
     gen_op_st_rm_T0_A0(s, ot, op1);
 
-    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
+    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
     tcg_temp_free(count);
 }
 
 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
 {
     if (s != OR_TMP1)
-        gen_op_mov_v_reg(ot, cpu_T[1], s);
+        gen_op_mov_v_reg(ot, cpu_T1, s);
     switch(op) {
     case OP_ROL:
         gen_rot_rm_T1(s1, ot, d, 0);
@@ -1863,48 +1810,58 @@ static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
         break;
     default:
         /* currently not optimized */
-        tcg_gen_movi_tl(cpu_T[1], c);
+        tcg_gen_movi_tl(cpu_T1, c);
         gen_shift(s1, op, ot, d, OR_TMP1);
         break;
     }
 }
 
-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
-{
-    target_long disp;
-    int havesib;
+/* Decompose an address.  */
+
+typedef struct AddressParts {
+    int def_seg;
     int base;
     int index;
     int scale;
-    int mod, rm, code, override, must_add_seg;
-    TCGv sum;
+    target_long disp;
+} AddressParts;
+
+static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
+                                    int modrm)
+{
+    int def_seg, base, index, scale, mod, rm;
+    target_long disp;
+    bool havesib;
+
+    def_seg = R_DS;
+    index = -1;
+    scale = 0;
+    disp = 0;
 
-    override = s->override;
-    must_add_seg = s->addseg;
-    if (override >= 0)
-        must_add_seg = 1;
     mod = (modrm >> 6) & 3;
     rm = modrm & 7;
+    base = rm | REX_B(s);
+
+    if (mod == 3) {
+        /* Normally filtered out earlier, but including this path
+           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
+        goto done;
+    }
 
     switch (s->aflag) {
     case MO_64:
     case MO_32:
         havesib = 0;
-        base = rm;
-        index = -1;
-        scale = 0;
-
-        if (base == 4) {
-            havesib = 1;
-            code = cpu_ldub_code(env, s->pc++);
+        if (rm == 4) {
+            int code = cpu_ldub_code(env, s->pc++);
             scale = (code >> 6) & 3;
             index = ((code >> 3) & 7) | REX_X(s);
             if (index == 4) {
                 index = -1;  /* no index */
             }
-            base = (code & 7);
+            base = (code & 7) | REX_B(s);
+            havesib = 1;
         }
-        base |= REX_B(s);
 
         switch (mod) {
         case 0:
@@ -1913,10 +1870,9 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
                 disp = (int32_t)cpu_ldl_code(env, s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
+                    base = -2;
                     disp += s->pc + s->rip_offset;
                 }
-            } else {
-                disp = 0;
             }
             break;
         case 1:
@@ -1933,204 +1889,132 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
         if (base == R_ESP && s->popl_esp_hack) {
             disp += s->popl_esp_hack;
         }
-
-        /* Compute the address, with a minimum number of TCG ops.  */
-        TCGV_UNUSED(sum);
-        if (index >= 0) {
-            if (scale == 0) {
-                sum = cpu_regs[index];
-            } else {
-                tcg_gen_shli_tl(cpu_A0, cpu_regs[index], scale);
-                sum = cpu_A0;
-            }
-            if (base >= 0) {
-                tcg_gen_add_tl(cpu_A0, sum, cpu_regs[base]);
-                sum = cpu_A0;
-            }
-        } else if (base >= 0) {
-            sum = cpu_regs[base];
-        }
-        if (TCGV_IS_UNUSED(sum)) {
-            tcg_gen_movi_tl(cpu_A0, disp);
-        } else {
-            tcg_gen_addi_tl(cpu_A0, sum, disp);
-        }
-
-        if (must_add_seg) {
-            if (override < 0) {
-                if (base == R_EBP || base == R_ESP) {
-                    override = R_SS;
-                } else {
-                    override = R_DS;
-                }
-            }
-
-            tcg_gen_ld_tl(cpu_tmp0, cpu_env,
-                          offsetof(CPUX86State, segs[override].base));
-            if (CODE64(s)) {
-                if (s->aflag == MO_32) {
-                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
-                }
-                tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-                return;
-            }
-
-            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-        }
-
-        if (s->aflag == MO_32) {
-            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+        if (base == R_EBP || base == R_ESP) {
+            def_seg = R_SS;
         }
         break;
 
     case MO_16:
-        switch (mod) {
-        case 0:
+        if (mod == 0) {
             if (rm == 6) {
+                base = -1;
                 disp = cpu_lduw_code(env, s->pc);
                 s->pc += 2;
-                tcg_gen_movi_tl(cpu_A0, disp);
-                rm = 0; /* avoid SS override */
-                goto no_rm;
-            } else {
-                disp = 0;
+                break;
             }
-            break;
-        case 1:
+        } else if (mod == 1) {
             disp = (int8_t)cpu_ldub_code(env, s->pc++);
-            break;
-        default:
-        case 2:
+        } else {
             disp = (int16_t)cpu_lduw_code(env, s->pc);
             s->pc += 2;
-            break;
         }
 
-        sum = cpu_A0;
         switch (rm) {
         case 0:
-            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_ESI]);
+            base = R_EBX;
+            index = R_ESI;
             break;
         case 1:
-            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_EDI]);
+            base = R_EBX;
+            index = R_EDI;
             break;
         case 2:
-            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_ESI]);
+            base = R_EBP;
+            index = R_ESI;
+            def_seg = R_SS;
             break;
         case 3:
-            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_EDI]);
+            base = R_EBP;
+            index = R_EDI;
+            def_seg = R_SS;
             break;
         case 4:
-            sum = cpu_regs[R_ESI];
+            base = R_ESI;
             break;
         case 5:
-            sum = cpu_regs[R_EDI];
+            base = R_EDI;
             break;
         case 6:
-            sum = cpu_regs[R_EBP];
+            base = R_EBP;
+            def_seg = R_SS;
             break;
         default:
         case 7:
-            sum = cpu_regs[R_EBX];
+            base = R_EBX;
             break;
         }
-        tcg_gen_addi_tl(cpu_A0, sum, disp);
-        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
-    no_rm:
-        if (must_add_seg) {
-            if (override < 0) {
-                if (rm == 2 || rm == 3 || rm == 6) {
-                    override = R_SS;
-                } else {
-                    override = R_DS;
-                }
-            }
-            gen_op_addl_A0_seg(s, override);
-        }
         break;
 
     default:
         tcg_abort();
     }
+
+ done:
+    return (AddressParts){ def_seg, base, index, scale, disp };
 }
 
-static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
+/* Compute the address, with a minimum number of TCG ops.  */
+static TCGv gen_lea_modrm_1(AddressParts a)
 {
-    int mod, rm, base, code;
+    TCGv ea;
 
-    mod = (modrm >> 6) & 3;
-    if (mod == 3)
-        return;
-    rm = modrm & 7;
+    TCGV_UNUSED(ea);
+    if (a.index >= 0) {
+        if (a.scale == 0) {
+            ea = cpu_regs[a.index];
+        } else {
+            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
+            ea = cpu_A0;
+        }
+        if (a.base >= 0) {
+            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
+            ea = cpu_A0;
+        }
+    } else if (a.base >= 0) {
+        ea = cpu_regs[a.base];
+    }
+    if (TCGV_IS_UNUSED(ea)) {
+        tcg_gen_movi_tl(cpu_A0, a.disp);
+        ea = cpu_A0;
+    } else if (a.disp != 0) {
+        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
+        ea = cpu_A0;
+    }
 
-    switch (s->aflag) {
-    case MO_64:
-    case MO_32:
-        base = rm;
+    return ea;
+}
 
-        if (base == 4) {
-            code = cpu_ldub_code(env, s->pc++);
-            base = (code & 7);
-        }
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+{
+    AddressParts a = gen_lea_modrm_0(env, s, modrm);
+    TCGv ea = gen_lea_modrm_1(a);
+    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
+}
 
-        switch (mod) {
-        case 0:
-            if (base == 5) {
-                s->pc += 4;
-            }
-            break;
-        case 1:
-            s->pc++;
-            break;
-        default:
-        case 2:
-            s->pc += 4;
-            break;
-        }
-        break;
+static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
+{
+    (void)gen_lea_modrm_0(env, s, modrm);
+}
 
-    case MO_16:
-        switch (mod) {
-        case 0:
-            if (rm == 6) {
-                s->pc += 2;
-            }
-            break;
-        case 1:
-            s->pc++;
-            break;
-        default:
-        case 2:
-            s->pc += 2;
-            break;
-        }
-        break;
+/* Used for BNDCL, BNDCU, BNDCN.  */
+static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
+                      TCGCond cond, TCGv_i64 bndv)
+{
+    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
 
-    default:
-        tcg_abort();
+    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
+    if (!CODE64(s)) {
+        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
     }
+    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
+    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
+    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
 }
 
 /* used for LEA and MOV AX, mem */
 static void gen_add_A0_ds_seg(DisasContext *s)
 {
-    int override, must_add_seg;
-    must_add_seg = s->addseg;
-    override = R_DS;
-    if (s->override >= 0) {
-        override = s->override;
-        must_add_seg = 1;
-    }
-    if (must_add_seg) {
-#ifdef TARGET_X86_64
-        if (CODE64(s)) {
-            gen_op_addq_A0_seg(override);
-        } else
-#endif
-        {
-            gen_op_addl_A0_seg(s, override);
-        }
-    }
+    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
 }
 
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
@@ -2145,23 +2029,23 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
     if (mod == 3) {
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T[0], reg);
-            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_reg_v(ot, rm, cpu_T0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T[0], rm);
+            gen_op_mov_v_reg(ot, cpu_T0, rm);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                gen_op_mov_reg_v(ot, reg, cpu_T0);
         }
     } else {
         gen_lea_modrm(env, s, modrm);
         if (is_store) {
             if (reg != OR_TMP0)
-                gen_op_mov_v_reg(ot, cpu_T[0], reg);
-            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+                gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
         } else {
-            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
             if (reg != OR_TMP0)
-                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                gen_op_mov_reg_v(ot, reg, cpu_T0);
         }
     }
 }
@@ -2258,7 +2142,7 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
 
     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
-    cc = gen_prepare_cc(s, b, cpu_T[1]);
+    cc = gen_prepare_cc(s, b, cpu_T1);
     if (cc.mask != -1) {
         TCGv t0 = tcg_temp_new();
         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
@@ -2268,9 +2152,9 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
         cc.reg2 = tcg_const_tl(cc.imm);
     }
 
-    tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2,
-                       cpu_T[0], cpu_regs[reg]);
-    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
+                       cpu_T0, cpu_regs[reg]);
+    gen_op_mov_reg_v(ot, reg, cpu_T0);
 
     if (cc.mask != -1) {
         tcg_temp_free(cc.reg);
@@ -2282,29 +2166,24 @@ static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
 
 static inline void gen_op_movl_T0_seg(int seg_reg)
 {
-    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
                      offsetof(CPUX86State,segs[seg_reg].selector));
 }
 
 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
 {
-    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
-    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
+    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+    tcg_gen_st32_tl(cpu_T0, cpu_env,
                     offsetof(CPUX86State,segs[seg_reg].selector));
-    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
-    tcg_gen_st_tl(cpu_T[0], cpu_env, 
-                  offsetof(CPUX86State,segs[seg_reg].base));
+    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
 }
 
 /* move T0 to seg_reg and compute if the CPU state may change. Never
    call this function with seg_reg == R_CS */
-static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
+static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
 {
     if (s->pe && !s->vm86) {
-        /* XXX: optimize by finding processor state dynamically */
-        gen_update_cc_op(s);
-        gen_jmp_im(cur_eip);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
@@ -2345,44 +2224,25 @@ gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
 
 static inline void gen_stack_update(DisasContext *s, int addend)
 {
-#ifdef TARGET_X86_64
-    if (CODE64(s)) {
-        gen_op_add_reg_im(MO_64, R_ESP, addend);
-    } else
-#endif
-    if (s->ss32) {
-        gen_op_add_reg_im(MO_32, R_ESP, addend);
-    } else {
-        gen_op_add_reg_im(MO_16, R_ESP, addend);
-    }
+    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
 }
 
 /* Generate a push. It depends on ss32, addseg and dflag.  */
 static void gen_push_v(DisasContext *s, TCGv val)
 {
-    TCGMemOp a_ot, d_ot = mo_pushpop(s, s->dflag);
+    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
+    TCGMemOp a_ot = mo_stacksize(s);
     int size = 1 << d_ot;
     TCGv new_esp = cpu_A0;
 
     tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
 
-    if (CODE64(s)) {
-        a_ot = MO_64;
-    } else if (s->ss32) {
-        a_ot = MO_32;
+    if (!CODE64(s)) {
         if (s->addseg) {
             new_esp = cpu_tmp4;
             tcg_gen_mov_tl(new_esp, cpu_A0);
-            gen_op_addl_A0_seg(s, R_SS);
-        } else {
-            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
         }
-    } else {
-        a_ot = MO_16;
-        new_esp = cpu_tmp4;
-        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
-        tcg_gen_mov_tl(new_esp, cpu_A0);
-        gen_op_addl_A0_seg(s, R_SS);
+        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
     }
 
     gen_op_st_v(s, d_ot, val, cpu_A0);
@@ -2393,127 +2253,112 @@ static void gen_push_v(DisasContext *s, TCGv val)
 static TCGMemOp gen_pop_T0(DisasContext *s)
 {
     TCGMemOp d_ot = mo_pushpop(s, s->dflag);
-    TCGv addr = cpu_A0;
 
-    if (CODE64(s)) {
-        addr = cpu_regs[R_ESP];
-    } else if (!s->ss32) {
-        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESP]);
-        gen_op_addl_A0_seg(s, R_SS);
-    } else if (s->addseg) {
-        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_ESP]);
-        gen_op_addl_A0_seg(s, R_SS);
-    } else {
-        tcg_gen_ext32u_tl(cpu_A0, cpu_regs[R_ESP]);
-    }
+    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
+    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
 
-    gen_op_ld_v(s, d_ot, cpu_T[0], addr);
     return d_ot;
 }
 
-static void gen_pop_update(DisasContext *s, TCGMemOp ot)
+static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
 {
     gen_stack_update(s, 1 << ot);
 }
 
-static void gen_stack_A0(DisasContext *s)
+static inline void gen_stack_A0(DisasContext *s)
 {
-    gen_op_movl_A0_reg(R_ESP);
-    if (!s->ss32)
-        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
-    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-    if (s->addseg)
-        gen_op_addl_A0_seg(s, R_SS);
+    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
 }
 
-/* NOTE: wrap around in 16 bit not fully handled */
 static void gen_pusha(DisasContext *s)
 {
+    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
+    TCGMemOp d_ot = s->dflag;
+    int size = 1 << d_ot;
     int i;
-    gen_op_movl_A0_reg(R_ESP);
-    gen_op_addl_A0_im(-8 << s->dflag);
-    if (!s->ss32)
-        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
-    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-    if (s->addseg)
-        gen_op_addl_A0_seg(s, R_SS);
-    for(i = 0;i < 8; i++) {
-        gen_op_mov_v_reg(MO_32, cpu_T[0], 7 - i);
-        gen_op_st_v(s, s->dflag, cpu_T[0], cpu_A0);
-        gen_op_addl_A0_im(1 << s->dflag);
+
+    for (i = 0; i < 8; i++) {
+        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
+        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
     }
-    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
+
+    gen_stack_update(s, -8 * size);
 }
 
-/* NOTE: wrap around in 16 bit not fully handled */
 static void gen_popa(DisasContext *s)
 {
+    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
+    TCGMemOp d_ot = s->dflag;
+    int size = 1 << d_ot;
     int i;
-    gen_op_movl_A0_reg(R_ESP);
-    if (!s->ss32)
-        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
-    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 8 << s->dflag);
-    if (s->addseg)
-        gen_op_addl_A0_seg(s, R_SS);
-    for(i = 0;i < 8; i++) {
+
+    for (i = 0; i < 8; i++) {
         /* ESP is not reloaded */
-        if (i != 3) {
-            gen_op_ld_v(s, s->dflag, cpu_T[0], cpu_A0);
-            gen_op_mov_reg_v(s->dflag, 7 - i, cpu_T[0]);
+        if (7 - i == R_ESP) {
+            continue;
         }
-        gen_op_addl_A0_im(1 << s->dflag);
+        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
+        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
+        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
     }
-    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
+
+    gen_stack_update(s, 8 * size);
 }
 
 static void gen_enter(DisasContext *s, int esp_addend, int level)
 {
-    TCGMemOp ot = mo_pushpop(s, s->dflag);
-    int opsize = 1 << ot;
+    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
+    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
+    int size = 1 << d_ot;
 
-    level &= 0x1f;
-#ifdef TARGET_X86_64
-    if (CODE64(s)) {
-        gen_op_movl_A0_reg(R_ESP);
-        gen_op_addq_A0_im(-opsize);
-        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-
-        /* push bp */
-        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
-        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
-        if (level) {
-            /* XXX: must save state */
-            gen_helper_enter64_level(cpu_env, tcg_const_i32(level),
-                                     tcg_const_i32((ot == MO_64)),
-                                     cpu_T[1]);
-        }
-        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
-        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
-        gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[1]);
-    } else
-#endif
-    {
-        gen_op_movl_A0_reg(R_ESP);
-        gen_op_addl_A0_im(-opsize);
-        if (!s->ss32)
-            tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
-        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-        if (s->addseg)
-            gen_op_addl_A0_seg(s, R_SS);
-        /* push bp */
-        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
-        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
-        if (level) {
-            /* XXX: must save state */
-            gen_helper_enter_level(cpu_env, tcg_const_i32(level),
-                                   tcg_const_i32(s->dflag - 1),
-                                   cpu_T[1]);
-        }
-        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
-        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
-        gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
+    /* Push BP; compute FrameTemp into T1.  */
+    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
+    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
+    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
+
+    level &= 31;
+    if (level != 0) {
+        int i;
+
+        /* Copy level-1 pointers from the previous frame.  */
+        for (i = 1; i < level; ++i) {
+            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
+            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
+            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
+
+            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
+            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
+            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
+        }
+
+        /* Push the current FrameTemp as the last level.  */
+        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
+        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
+        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
     }
+
+    /* Copy the FrameTemp value to EBP.  */
+    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
+
+    /* Compute the final value of ESP.  */
+    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
+    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
+}
+
+static void gen_leave(DisasContext *s)
+{
+    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
+    TCGMemOp a_ot = mo_stacksize(s);
+
+    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
+    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
+
+    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
+
+    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
+    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
 }
 
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
@@ -2524,6 +2369,30 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
     s->is_jmp = DISAS_TB_JUMP;
 }
 
+/* Generate #UD for the current instruction.  The assumption here is that
+   the instruction is known, but it isn't allowed in the current cpu mode.  */
+static void gen_illegal_opcode(DisasContext *s)
+{
+    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
+}
+
+/* Similarly, except that the assumption here is that we don't decode
+   the instruction at all -- either a missing opcode, an unimplemented
+   feature, or just a bogus instruction stream.  */
+static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
+{
+    gen_illegal_opcode(s);
+
+    if (qemu_loglevel_mask(LOG_UNIMP)) {
+        target_ulong pc = s->pc_start, end = s->pc;
+        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
+        for (; pc < end; ++pc) {
+            qemu_log(" %02x", cpu_ldub_code(env, pc));
+        }
+        qemu_log("\n");
+    }
+}
+
 /* an interrupt is different from an exception because of the
    privilege checks */
 static void gen_interrupt(DisasContext *s, int intno,
@@ -2544,14 +2413,56 @@ static void gen_debug(DisasContext *s, target_ulong cur_eip)
     s->is_jmp = DISAS_TB_JUMP;
 }
 
-/* generate a generic end of block. Trace exception is also generated
-   if needed */
-static void gen_eob(DisasContext *s)
+static void gen_set_hflag(DisasContext *s, uint32_t mask)
+{
+    if ((s->flags & mask) == 0) {
+        TCGv_i32 t = tcg_temp_new_i32();
+        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
+        tcg_gen_ori_i32(t, t, mask);
+        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
+        tcg_temp_free_i32(t);
+        s->flags |= mask;
+    }
+}
+
+static void gen_reset_hflag(DisasContext *s, uint32_t mask)
+{
+    if (s->flags & mask) {
+        TCGv_i32 t = tcg_temp_new_i32();
+        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
+        tcg_gen_andi_i32(t, t, ~mask);
+        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
+        tcg_temp_free_i32(t);
+        s->flags &= ~mask;
+    }
+}
+
+/* Clear BND registers during legacy branches.  */
+static void gen_bnd_jmp(DisasContext *s)
+{
+    /* Clear the registers only if BND prefix is missing, MPX is enabled,
+       and if the BNDREGs are known to be in use (non-zero) already.
+       The helper itself will check BNDPRESERVE at runtime.  */
+    if ((s->prefix & PREFIX_REPNZ) == 0
+        && (s->flags & HF_MPX_EN_MASK) != 0
+        && (s->flags & HF_MPX_IU_MASK) != 0) {
+        gen_helper_bnd_jmp(cpu_env);
+    }
+}
+
+/* Generate an end of block. Trace exception is also generated if needed.
+   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
+static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
 {
     gen_update_cc_op(s);
-    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
-        gen_helper_reset_inhibit_irq(cpu_env);
+
+    /* If several instructions disable interrupts, only the first does it.  */
+    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
+        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
+    } else {
+        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
     }
+
     if (s->tb->flags & HF_RF_MASK) {
         gen_helper_reset_rf(cpu_env);
     }
@@ -2565,6 +2476,12 @@ static void gen_eob(DisasContext *s)
     s->is_jmp = DISAS_TB_JUMP;
 }
 
+/* End of block, resetting the inhibit irq flag.  */
+static void gen_eob(DisasContext *s)
+{
+    gen_eob_inhibit_irq(s, false);
+}
+
 /* generate a jump to eip. No segment change must happen before as a
    direct call to the next block may occur */
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
@@ -2601,28 +2518,28 @@ static inline void gen_ldo_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
     tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
     tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
 static inline void gen_sto_env_A0(DisasContext *s, int offset)
 {
     int mem_index = s->mem_index;
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
     tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
     tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
 }
 
 static inline void gen_op_movo(int d_offset, int s_offset)
 {
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0)));
-    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1)));
-    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1)));
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
+    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
+    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
 }
 
 static inline void gen_op_movq(int d_offset, int s_offset)
@@ -2995,7 +2912,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         b1 = 0;
     sse_fn_epp = sse_op_table1[b][b1];
     if (!sse_fn_epp) {
-        goto illegal_op;
+        goto unknown_op;
     }
     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
         is_xmm = 1;
@@ -3014,15 +2931,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
     }
     if (s->flags & HF_EM_MASK) {
     illegal_op:
-        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+        gen_illegal_opcode(s);
         return;
     }
-    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
-        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
-            goto illegal_op;
+    if (is_xmm
+        && !(s->flags & HF_OSFXSR_MASK)
+        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
+        goto unknown_op;
+    }
     if (b == 0x0e) {
-        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
-            goto illegal_op;
+        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
+            /* If we were fully decoding this we might use illegal_op.  */
+            goto unknown_op;
+        }
         /* femms */
         gen_helper_emms(cpu_env);
         return;
@@ -3047,8 +2968,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         b |= (b1 << 8);
         switch(b) {
         case 0x0e7: /* movntq */
-            if (mod == 3)
+            if (mod == 3) {
                 goto illegal_op;
+            }
             gen_lea_modrm(env, s, modrm);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
@@ -3073,25 +2995,25 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             gen_lea_modrm(env, s, modrm);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
+                                           xmm_regs[reg].ZMM_Q(0)));
             } else {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
-                    xmm_regs[reg].XMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
+                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                    xmm_regs[reg].ZMM_L(0)));
+                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
             }
             break;
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
+                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
             }
             break;
@@ -3101,14 +3023,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
+                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
             } else
 #endif
             {
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
             }
             break;
@@ -3142,30 +3064,30 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                tcg_gen_movi_tl(cpu_T[0], 0);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
+                tcg_gen_movi_tl(cpu_T0, 0);
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
             }
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
-                tcg_gen_movi_tl(cpu_T[0], 0);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+                                           xmm_regs[reg].ZMM_Q(0)));
+                tcg_gen_movi_tl(cpu_T0, 0);
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
         case 0x012: /* movlps */
@@ -3173,12 +3095,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
+                                           xmm_regs[reg].ZMM_Q(0)));
             } else {
                 /* movhlps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
             }
             break;
         case 0x212: /* movsldup */
@@ -3187,40 +3109,40 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
-                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
-                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
+                                           xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
-                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             break;
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(1)));
+                                           xmm_regs[reg].ZMM_Q(1)));
             } else {
                 /* movlhps */
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
             break;
         case 0x216: /* movshdup */
@@ -3229,15 +3151,15 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
             }
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
-                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
-            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
-                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
+                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
+                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
             break;
         case 0x178:
         case 0x378:
@@ -3263,13 +3185,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
+                tcg_gen_ld_i64(cpu_T0, cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
+                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
@@ -3277,14 +3199,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
-                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                tcg_gen_ld_i64(cpu_T0, cpu_env,
+                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
-                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
             }
             break;
@@ -3292,13 +3214,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
+                                           xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
-                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             }
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
@@ -3328,23 +3250,23 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
+                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
+                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
-                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
             }
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
+                                           xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
-                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
             }
             break;
         case 0x013: /* movlps */
@@ -3352,7 +3274,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
+                                           xmm_regs[reg].ZMM_Q(0)));
             } else {
                 goto illegal_op;
             }
@@ -3362,7 +3284,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(1)));
+                                           xmm_regs[reg].ZMM_Q(1)));
             } else {
                 goto illegal_op;
             }
@@ -3374,26 +3296,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x172:
         case 0x173:
             if (b1 >= 2) {
-               goto illegal_op;
+               goto unknown_op;
             }
             val = cpu_ldub_code(env, s->pc++);
             if (is_xmm) {
-                tcg_gen_movi_tl(cpu_T[0], val);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
-                tcg_gen_movi_tl(cpu_T[0], 0);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
+                tcg_gen_movi_tl(cpu_T0, val);
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
+                tcg_gen_movi_tl(cpu_T0, 0);
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
                 op1_offset = offsetof(CPUX86State,xmm_t0);
             } else {
-                tcg_gen_movi_tl(cpu_T[0], val);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
-                tcg_gen_movi_tl(cpu_T[0], 0);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
+                tcg_gen_movi_tl(cpu_T0, val);
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
+                tcg_gen_movi_tl(cpu_T0, 0);
+                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
                 op1_offset = offsetof(CPUX86State,mmx_t0);
             }
             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
                                        (((modrm >> 3)) & 7)][b1];
             if (!sse_fn_epp) {
-                goto illegal_op;
+                goto unknown_op;
             }
             if (is_xmm) {
                 rm = (modrm & 7) | REX_B(s);
@@ -3452,12 +3374,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                 sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
-                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T[0]);
+                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
 #else
                 goto illegal_op;
 #endif
@@ -3502,10 +3424,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 if ((b >> 8) & 1) {
-                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
+                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
                 } else {
-                    gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
-                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                 }
                 op2_offset = offsetof(CPUX86State,xmm_t0);
             } else {
@@ -3517,17 +3439,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 SSEFunc_i_ep sse_fn_i_ep =
                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
                 sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
-                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
             } else {
 #ifdef TARGET_X86_64
                 SSEFunc_l_ep sse_fn_l_ep =
                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
-                sse_fn_l_ep(cpu_T[0], cpu_env, cpu_ptr0);
+                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
 #else
                 goto illegal_op;
 #endif
             }
-            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+            gen_op_mov_reg_v(ot, reg, cpu_T0);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
@@ -3536,11 +3458,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
-                tcg_gen_st16_tl(cpu_T[0], cpu_env,
-                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
+                tcg_gen_st16_tl(cpu_T0, cpu_env,
+                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
             } else {
                 val &= 3;
-                tcg_gen_st16_tl(cpu_T[0], cpu_env,
+                tcg_gen_st16_tl(cpu_T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
             }
             break;
@@ -3553,41 +3475,41 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             if (b1) {
                 val &= 7;
                 rm = (modrm & 7) | REX_B(s);
-                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
-                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
+                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
+                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
             } else {
                 val &= 3;
                 rm = (modrm & 7);
-                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+            gen_op_mov_reg_v(ot, reg, cpu_T0);
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
                 gen_lea_modrm(env, s, modrm);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
-                                           xmm_regs[reg].XMM_Q(0)));
+                                           xmm_regs[reg].ZMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
-                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
-                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
+                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
             }
             break;
         case 0x2d6: /* movq2dq */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7);
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
                         offsetof(CPUX86State,fpregs[rm].mmx));
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
             gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7) | REX_B(s);
             gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
-                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
             break;
         case 0xd7: /* pmovmskb */
         case 0x1d7:
@@ -3617,12 +3539,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             if (b1 >= 2) {
-                goto illegal_op;
+                goto unknown_op;
             }
 
             sse_fn_epp = sse_op_table6[b].op[b1];
             if (!sse_fn_epp) {
-                goto illegal_op;
+                goto unknown_op;
             }
             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
                 goto illegal_op;
@@ -3639,20 +3561,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
                         gen_ldq_env_A0(s, op2_offset +
-                                        offsetof(XMMReg, XMM_Q(0)));
+                                        offsetof(ZMMReg, ZMM_Q(0)));
                         break;
                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
                         tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                             s->mem_index, MO_LEUL);
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
-                                        offsetof(XMMReg, XMM_L(0)));
+                                        offsetof(ZMMReg, ZMM_L(0)));
                         break;
                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
                         tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
                                            s->mem_index, MO_LEUW);
                         tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
-                                        offsetof(XMMReg, XMM_W(0)));
+                                        offsetof(ZMMReg, ZMM_W(0)));
                         break;
                     case 0x2a:            /* movntqda */
                         gen_ldo_env_A0(s, op1_offset);
@@ -3672,7 +3594,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
             }
             if (sse_fn_epp == SSE_SPECIAL) {
-                goto illegal_op;
+                goto unknown_op;
             }
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -3709,11 +3631,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
-                                 cpu_T[0], tcg_const_i32(8 << ot));
+                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
+                                 cpu_T0, tcg_const_i32(8 << ot));
 
                 ot = mo_64_32(s->dflag);
-                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                gen_op_mov_reg_v(ot, reg, cpu_T0);
                 break;
 
             case 0x1f0: /* crc32 or movbe */
@@ -3738,9 +3660,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 gen_lea_modrm(env, s, modrm);
                 if ((b & 1) == 0) {
-                    tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
+                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
                                        s->mem_index, ot | MO_BE);
-                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                    gen_op_mov_reg_v(ot, reg, cpu_T0);
                 } else {
                     tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3755,8 +3677,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
-                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
+                gen_op_mov_reg_v(ot, reg, cpu_T0);
                 gen_op_update1_cc();
                 set_cc_op(s, CC_OP_LOGICB + ot);
                 break;
@@ -3775,12 +3697,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
-                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0);
+                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
 
                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     zero = tcg_const_tl(0);
-                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound,
-                                       cpu_T[0], zero);
+                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
+                                       cpu_T0, zero);
                     tcg_temp_free(zero);
 
                     /* Extract the LEN into a mask.  Lengths larger than
@@ -3790,12 +3712,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
                                        cpu_A0, bound);
                     tcg_temp_free(bound);
-                    tcg_gen_movi_tl(cpu_T[1], 1);
-                    tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_A0);
-                    tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1);
-                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                    tcg_gen_movi_tl(cpu_T1, 1);
+                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
+                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
+                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
 
-                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                    gen_op_mov_reg_v(ot, reg, cpu_T0);
                     gen_op_update1_cc();
                     set_cc_op(s, CC_OP_LOGICB + ot);
                 }
@@ -3809,21 +3731,21 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
                     /* Note that since we're using BMILG (in order to get O
                        cleared) we need to store the inverse into C.  */
                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
-                                       cpu_T[1], bound);
-                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T[1], cpu_T[1],
-                                       bound, bound, cpu_T[1]);
+                                       cpu_T1, bound);
+                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
+                                       bound, bound, cpu_T1);
                     tcg_temp_free(bound);
                 }
                 tcg_gen_movi_tl(cpu_A0, -1);
-                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]);
-                tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0);
-                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
+                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
+                gen_op_mov_reg_v(ot, reg, cpu_T0);
                 gen_op_update1_cc();
                 set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
@@ -3838,7 +3760,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 switch (ot) {
                 default:
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                     tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
                     tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                       cpu_tmp2_i32, cpu_tmp3_i32);
@@ -3847,8 +3769,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
 #ifdef TARGET_X86_64
                 case MO_64:
-                    tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg],
-                                      cpu_T[0], cpu_regs[R_EDX]);
+                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
+                                      cpu_T0, cpu_regs[R_EDX]);
+                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
+                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
                     break;
 #endif
                 }
@@ -3865,11 +3789,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pdep(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
                 break;
 
             case 0x2f5: /* pext Gy, By, Ey */
@@ -3883,11 +3807,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
-                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
                 } else {
-                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
+                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
                 }
-                gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
                 break;
 
             case 0x1f6: /* adcx Gy, Ey */
@@ -3946,22 +3870,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         /* If we know TL is 64-bit, and we want a 32-bit
                            result, just do everything in 64-bit arithmetic.  */
                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
-                        tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]);
-                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]);
-                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in);
-                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]);
-                        tcg_gen_shri_i64(carry_out, cpu_T[0], 32);
+                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
+                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
+                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
+                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
+                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
                         break;
 #endif
                     default:
                         /* Otherwise compute the carry-out in two steps.  */
                         zero = tcg_const_tl(0);
-                        tcg_gen_add2_tl(cpu_T[0], carry_out,
-                                        cpu_T[0], zero,
+                        tcg_gen_add2_tl(cpu_T0, carry_out,
+                                        cpu_T0, zero,
                                         carry_in, zero);
                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
                                         cpu_regs[reg], carry_out,
-                                        cpu_T[0], zero);
+                                        cpu_T0, zero);
                         tcg_temp_free(zero);
                         break;
                     }
@@ -3980,24 +3904,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 if (ot == MO_64) {
-                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
+                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
                 } else {
-                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31);
+                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
                 }
                 if (b == 0x1f7) {
-                    tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
                 } else if (b == 0x2f7) {
                     if (ot != MO_64) {
-                        tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
                     }
-                    tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
                 } else {
                     if (ot != MO_64) {
-                        tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
                     }
-                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                gen_op_mov_reg_v(ot, reg, cpu_T0);
                 break;
 
             case 0x0f3:
@@ -4014,36 +3938,36 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_neg_tl(cpu_T[1], cpu_T[0]);
-                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T[0]);
+                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
                     gen_op_update2_cc();
                     set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
 
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
-                    tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
+                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
                     set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
 
                 case 3: /* blsi By, Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
-                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
+                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
                     set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
 
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
 
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
             break;
 
@@ -4055,12 +3979,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             if (b1 >= 2) {
-                goto illegal_op;
+                goto unknown_op;
             }
 
             sse_fn_eppi = sse_op_table7[b].op[b1];
             if (!sse_fn_eppi) {
-                goto illegal_op;
+                goto unknown_op;
             }
             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
                 goto illegal_op;
@@ -4074,22 +3998,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
                 case 0x14: /* pextrb */
-                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
-                                            xmm_regs[reg].XMM_B(val & 15)));
+                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].ZMM_B(val & 15)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                        gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
                                            s->mem_index, MO_UB);
                     }
                     break;
                 case 0x15: /* pextrw */
-                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
-                                            xmm_regs[reg].XMM_W(val & 7)));
+                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].ZMM_W(val & 7)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                        gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
                                            s->mem_index, MO_LEUW);
                     }
                     break;
@@ -4097,7 +4021,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     if (ot == MO_32) { /* pextrd */
                         tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_L(val & 3)));
+                                                xmm_regs[reg].ZMM_L(val & 3)));
                         if (mod == 3) {
                             tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
                         } else {
@@ -4108,7 +4032,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 #ifdef TARGET_X86_64
                         tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_Q(val & 1)));
+                                                xmm_regs[reg].ZMM_Q(val & 1)));
                         if (mod == 3) {
                             tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
                         } else {
@@ -4121,53 +4045,53 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     }
                     break;
                 case 0x17: /* extractps */
-                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
-                                            xmm_regs[reg].XMM_L(val & 3)));
+                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].ZMM_L(val & 3)));
                     if (mod == 3) {
-                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                        gen_op_mov_reg_v(ot, rm, cpu_T0);
                     } else {
-                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
+                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
                                            s->mem_index, MO_LEUL);
                     }
                     break;
                 case 0x20: /* pinsrb */
                     if (mod == 3) {
-                        gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
+                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
                     } else {
-                        tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
+                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
                                            s->mem_index, MO_UB);
                     }
-                    tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
-                                            xmm_regs[reg].XMM_B(val & 15)));
+                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
+                                            xmm_regs[reg].ZMM_B(val & 15)));
                     break;
                 case 0x21: /* insertps */
                     if (mod == 3) {
                         tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,xmm_regs[rm]
-                                                .XMM_L((val >> 6) & 3)));
+                                                .ZMM_L((val >> 6) & 3)));
                     } else {
                         tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                             s->mem_index, MO_LEUL);
                     }
                     tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
                                     offsetof(CPUX86State,xmm_regs[reg]
-                                            .XMM_L((val >> 4) & 3)));
+                                            .ZMM_L((val >> 4) & 3)));
                     if ((val >> 0) & 1)
                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
                                         cpu_env, offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_L(0)));
+                                                xmm_regs[reg].ZMM_L(0)));
                     if ((val >> 1) & 1)
                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
                                         cpu_env, offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_L(1)));
+                                                xmm_regs[reg].ZMM_L(1)));
                     if ((val >> 2) & 1)
                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
                                         cpu_env, offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_L(2)));
+                                                xmm_regs[reg].ZMM_L(2)));
                     if ((val >> 3) & 1)
                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
                                         cpu_env, offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_L(3)));
+                                                xmm_regs[reg].ZMM_L(3)));
                     break;
                 case 0x22:
                     if (ot == MO_32) { /* pinsrd */
@@ -4179,7 +4103,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         }
                         tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
                                         offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_L(val & 3)));
+                                                xmm_regs[reg].ZMM_L(val & 3)));
                     } else { /* pinsrq */
 #ifdef TARGET_X86_64
                         if (mod == 3) {
@@ -4190,7 +4114,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                         }
                         tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
                                         offsetof(CPUX86State,
-                                                xmm_regs[reg].XMM_Q(val & 1)));
+                                                xmm_regs[reg].ZMM_Q(val & 1)));
 #else
                         goto illegal_op;
 #endif
@@ -4252,22 +4176,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
-                    tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
+                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
                 } else {
-                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                     tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
-                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
                 }
-                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                gen_op_mov_reg_v(ot, reg, cpu_T0);
                 break;
 
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
             break;
 
         default:
-            goto illegal_op;
+        unknown_op:
+            gen_unknown_opcode(env, s);
+            return;
         }
     } else {
         /* generic MMX or SSE operation */
@@ -4313,13 +4239,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 switch (sz) {
                 case 2:
                     /* 32 bit access */
-                    gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
-                    tcg_gen_st32_tl(cpu_T[0], cpu_env,
-                                    offsetof(CPUX86State,xmm_t0.XMM_L(0)));
+                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
+                    tcg_gen_st32_tl(cpu_T0, cpu_env,
+                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
                     break;
                 case 3:
                     /* 64 bit access */
-                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_D(0)));
+                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
                     break;
                 default:
                     /* 128 bit access */
@@ -4343,11 +4269,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         }
         switch(b) {
         case 0x0f: /* 3DNow! data insns */
-            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
-                goto illegal_op;
             val = cpu_ldub_code(env, s->pc++);
             sse_fn_epp = sse_op_table5[val];
             if (!sse_fn_epp) {
+                goto unknown_op;
+            }
+            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
                 goto illegal_op;
             }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4367,7 +4294,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             /* compare insns */
             val = cpu_ldub_code(env, s->pc++);
             if (val >= 8)
-                goto illegal_op;
+                goto unknown_op;
             sse_fn_epp = sse_op_table4[val][b1];
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4412,10 +4339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
-        tcg_gen_debug_insn_start(pc_start);
-    }
-    s->pc = pc_start;
+    s->pc_start = s->pc = pc_start;
     prefixes = 0;
     s->override = -1;
     rex_w = -1;
@@ -4528,7 +4452,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     b = 0x13a;
                     break;
                 default:   /* Reserved for future use.  */
-                    goto illegal_op;
+                    goto unknown_op;
                 }
             }
             s->vex_v = (~vex3 >> 3) & 0xf;
@@ -4608,13 +4532,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 xor_zero:
                     /* xor reg, reg optimisation */
                     set_cc_op(s, CC_OP_CLR);
-                    tcg_gen_movi_tl(cpu_T[0], 0);
-                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+                    tcg_gen_movi_tl(cpu_T0, 0);
+                    gen_op_mov_reg_v(ot, reg, cpu_T0);
                     break;
                 } else {
                     opreg = rm;
                 }
-                gen_op_mov_v_reg(ot, cpu_T[1], reg);
+                gen_op_mov_v_reg(ot, cpu_T1, reg);
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
@@ -4624,17 +4548,17 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
                 } else {
-                    gen_op_mov_v_reg(ot, cpu_T[1], rm);
+                    gen_op_mov_v_reg(ot, cpu_T1, rm);
                 }
                 gen_op(s, op, ot, reg);
                 break;
             case 2: /* OP A, Iv */
                 val = insn_get(env, s, ot);
-                tcg_gen_movi_tl(cpu_T[1], val);
+                tcg_gen_movi_tl(cpu_T1, val);
                 gen_op(s, op, ot, OR_EAX);
                 break;
             }
@@ -4679,7 +4603,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 val = (int8_t)insn_get(env, s, MO_8);
                 break;
             }
-            tcg_gen_movi_tl(cpu_T[1], val);
+            tcg_gen_movi_tl(cpu_T1, val);
             gen_op(s, op, ot, opreg);
         }
         break;
@@ -4706,32 +4630,32 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T[0], rm);
+            gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
 
         switch(op) {
         case 0: /* test */
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T[1], val);
+            tcg_gen_movi_tl(cpu_T1, val);
             gen_op_testl_T0_T1_cc();
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
-            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_not_tl(cpu_T0, cpu_T0);
             if (mod != 3) {
-                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
             } else {
-                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                gen_op_mov_reg_v(ot, rm, cpu_T0);
             }
             break;
         case 3: /* neg */
-            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_neg_tl(cpu_T0, cpu_T0);
             if (mod != 3) {
-                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
             } else {
-                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                gen_op_mov_reg_v(ot, rm, cpu_T0);
             }
             gen_op_update_neg_cc();
             set_cc_op(s, CC_OP_SUBB + ot);
@@ -4739,32 +4663,32 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 4: /* mul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
-                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
+                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
+                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
+                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
-                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
+                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
+                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
-                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
+                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                   cpu_tmp2_i32, cpu_tmp3_i32);
@@ -4777,7 +4701,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T[0], cpu_regs[R_EAX]);
+                                  cpu_T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
                 set_cc_op(s, CC_OP_MULQ);
@@ -4788,34 +4712,34 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 5: /* imul */
             switch(ot) {
             case MO_8:
-                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
-                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
+                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
+                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
                 set_cc_op(s, CC_OP_MULB);
                 break;
             case MO_16:
-                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
-                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
-                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
+                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
                 /* XXX: use 32 bit mul which could be faster */
-                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
-                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
-                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
-                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
+                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
+                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
+                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
+                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
                 set_cc_op(s, CC_OP_MULW);
                 break;
             default:
             case MO_32:
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
                 tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                                   cpu_tmp2_i32, cpu_tmp3_i32);
@@ -4830,7 +4754,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
 #ifdef TARGET_X86_64
             case MO_64:
                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
-                                  cpu_T[0], cpu_regs[R_EAX]);
+                                  cpu_T0, cpu_regs[R_EAX]);
                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
@@ -4842,22 +4766,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 6: /* div */
             switch(ot) {
             case MO_8:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divb_AL(cpu_env, cpu_T[0]);
+                gen_helper_divb_AL(cpu_env, cpu_T0);
                 break;
             case MO_16:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divw_AX(cpu_env, cpu_T[0]);
+                gen_helper_divw_AX(cpu_env, cpu_T0);
                 break;
             default:
             case MO_32:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divl_EAX(cpu_env, cpu_T[0]);
+                gen_helper_divl_EAX(cpu_env, cpu_T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divq_EAX(cpu_env, cpu_T[0]);
+                gen_helper_divq_EAX(cpu_env, cpu_T0);
                 break;
 #endif
             }
@@ -4865,28 +4785,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 7: /* idiv */
             switch(ot) {
             case MO_8:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivb_AL(cpu_env, cpu_T[0]);
+                gen_helper_idivb_AL(cpu_env, cpu_T0);
                 break;
             case MO_16:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivw_AX(cpu_env, cpu_T[0]);
+                gen_helper_idivw_AX(cpu_env, cpu_T0);
                 break;
             default:
             case MO_32:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivl_EAX(cpu_env, cpu_T[0]);
+                gen_helper_idivl_EAX(cpu_env, cpu_T0);
                 break;
 #ifdef TARGET_X86_64
             case MO_64:
-                gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivq_EAX(cpu_env, cpu_T[0]);
+                gen_helper_idivq_EAX(cpu_env, cpu_T0);
                 break;
 #endif
             }
             break;
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
 
@@ -4899,7 +4815,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
         if (op >= 2 && b == 0xfe) {
-            goto illegal_op;
+            goto unknown_op;
         }
         if (CODE64(s)) {
             if (op == 2 || op == 4) {
@@ -4915,9 +4831,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             if (op >= 2 && op != 3 && op != 5)
-                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T[0], rm);
+            gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
 
         switch(op) {
@@ -4938,29 +4854,28 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 2: /* call Ev */
             /* XXX: optimize if memory (no 'and' is necessary) */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
             }
             next_eip = s->pc - s->cs_base;
-            tcg_gen_movi_tl(cpu_T[1], next_eip);
-            gen_push_v(s, cpu_T[1]);
-            gen_op_jmp_v(cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T1, next_eip);
+            gen_push_v(s, cpu_T1);
+            gen_op_jmp_v(cpu_T0);
+            gen_bnd_jmp(s);
             gen_eob(s);
             break;
         case 3: /* lcall Ev */
-            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
         do_lcall:
             if (s->pe && !s->vm86) {
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
                                            tcg_const_i32(dflag - 1),
-                                           tcg_const_i32(s->pc - pc_start));
+                                           tcg_const_tl(s->pc - s->cs_base));
             } else {
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T[1],
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
                                       tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
@@ -4968,33 +4883,32 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
         case 4: /* jmp Ev */
             if (dflag == MO_16) {
-                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
             }
-            gen_op_jmp_v(cpu_T[0]);
+            gen_op_jmp_v(cpu_T0);
+            gen_bnd_jmp(s);
             gen_eob(s);
             break;
         case 5: /* ljmp Ev */
-            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
             gen_add_A0_im(s, 1 << ot);
-            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
         do_ljmp:
             if (s->pe && !s->vm86) {
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
-                                          tcg_const_i32(s->pc - pc_start));
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
+                                          tcg_const_tl(s->pc - s->cs_base));
             } else {
                 gen_op_movl_seg_T0_vm(R_CS);
-                gen_op_jmp_v(cpu_T[1]);
+                gen_op_jmp_v(cpu_T1);
             }
             gen_eob(s);
             break;
         case 6: /* push Ev */
-            gen_push_v(s, cpu_T[0]);
+            gen_push_v(s, cpu_T0);
             break;
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
 
@@ -5006,7 +4920,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_v_reg(ot, cpu_T[1], reg);
+        gen_op_mov_v_reg(ot, cpu_T1, reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5016,8 +4930,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         ot = mo_b_d(b, dflag);
         val = insn_get(env, s, ot);
 
-        gen_op_mov_v_reg(ot, cpu_T[0], OR_EAX);
-        tcg_gen_movi_tl(cpu_T[1], val);
+        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
+        tcg_gen_movi_tl(cpu_T1, val);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
         break;
@@ -5026,20 +4940,20 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
-            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T[0]);
+            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
+            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
-            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T[0]);
+            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
+            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_8, cpu_T[0], R_EAX);
-            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
+            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
+            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
+            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
             break;
         default:
             tcg_abort();
@@ -5049,22 +4963,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         switch (dflag) {
 #ifdef TARGET_X86_64
         case MO_64:
-            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EAX);
-            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
-            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T[0]);
+            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
+            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
+            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
             break;
 #endif
         case MO_32:
-            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
-            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
-            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
-            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T[0]);
+            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
+            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
+            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
+            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
             break;
         case MO_16:
-            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
-            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
-            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
-            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
+            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
+            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
+            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
             break;
         default:
             tcg_abort();
@@ -5083,25 +4997,25 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
-            tcg_gen_movi_tl(cpu_T[1], val);
+            tcg_gen_movi_tl(cpu_T1, val);
         } else if (b == 0x6b) {
             val = (int8_t)insn_get(env, s, MO_8);
-            tcg_gen_movi_tl(cpu_T[1], val);
+            tcg_gen_movi_tl(cpu_T1, val);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T[1], reg);
+            gen_op_mov_v_reg(ot, cpu_T1, reg);
         }
         switch (ot) {
 #ifdef TARGET_X86_64
         case MO_64:
-            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
+            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
             break;
 #endif
         case MO_32:
-            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
             tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
                               cpu_tmp2_i32, cpu_tmp3_i32);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
@@ -5111,14 +5025,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
             break;
         default:
-            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
-            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
+            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
             /* XXX: use 32 bit mul which could be faster */
-            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
-            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
+            gen_op_mov_reg_v(ot, reg, cpu_T0);
             break;
         }
         set_cc_op(s, CC_OP_MULB + ot);
@@ -5131,18 +5045,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(ot, cpu_T[0], reg);
-            gen_op_mov_v_reg(ot, cpu_T[1], rm);
-            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
-            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_v_reg(ot, cpu_T1, rm);
+            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_op_mov_reg_v(ot, reg, cpu_T1);
+            gen_op_mov_reg_v(ot, rm, cpu_T0);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, cpu_T[0], reg);
-            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
-            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
-            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
-            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
         gen_op_update2_cc();
         set_cc_op(s, CC_OP_ADDB + ot);
@@ -5212,8 +5126,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (dflag == MO_64) {
             if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
                 goto illegal_op;
-            gen_jmp_im(pc_start - s->cs_base);
-            gen_update_cc_op(s);
             gen_lea_modrm(env, s, modrm);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
@@ -5221,8 +5133,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
-            gen_jmp_im(pc_start - s->cs_base);
-            gen_update_cc_op(s);
             gen_lea_modrm(env, s, modrm);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
@@ -5232,14 +5142,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         /**************************/
         /* push/pop */
     case 0x50 ... 0x57: /* push */
-        gen_op_mov_v_reg(MO_32, cpu_T[0], (b & 7) | REX_B(s));
-        gen_push_v(s, cpu_T[0]);
+        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
+        gen_push_v(s, cpu_T0);
         break;
     case 0x58 ... 0x5f: /* pop */
         ot = gen_pop_T0(s);
         /* NOTE: order is important for pop %sp */
         gen_pop_update(s, ot);
-        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T[0]);
+        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
         break;
     case 0x60: /* pusha */
         if (CODE64(s))
@@ -5258,8 +5168,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             val = insn_get(env, s, ot);
         else
             val = (int8_t)insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T[0], val);
-        gen_push_v(s, cpu_T[0]);
+        tcg_gen_movi_tl(cpu_T0, val);
+        gen_push_v(s, cpu_T0);
         break;
     case 0x8f: /* pop Ev */
         modrm = cpu_ldub_code(env, s->pc++);
@@ -5269,7 +5179,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             /* NOTE: order is important for pop %sp */
             gen_pop_update(s, ot);
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+            gen_op_mov_reg_v(ot, rm, cpu_T0);
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
@@ -5288,20 +5198,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         break;
     case 0xc9: /* leave */
-        /* XXX: exception not precise (ESP is updated before potential exception) */
-        if (CODE64(s)) {
-            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EBP);
-            gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[0]);
-        } else if (s->ss32) {
-            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
-            gen_op_mov_reg_v(MO_32, R_ESP, cpu_T[0]);
-        } else {
-            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EBP);
-            gen_op_mov_reg_v(MO_16, R_ESP, cpu_T[0]);
-        }
-        ot = gen_pop_T0(s);
-        gen_op_mov_reg_v(ot, R_EBP, cpu_T[0]);
-        gen_pop_update(s, ot);
+        gen_leave(s);
         break;
     case 0x06: /* push es */
     case 0x0e: /* push cs */
@@ -5310,12 +5207,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (CODE64(s))
             goto illegal_op;
         gen_op_movl_T0_seg(b >> 3);
-        gen_push_v(s, cpu_T[0]);
+        gen_push_v(s, cpu_T0);
         break;
     case 0x1a0: /* push fs */
     case 0x1a8: /* push gs */
         gen_op_movl_T0_seg((b >> 3) & 7);
-        gen_push_v(s, cpu_T[0]);
+        gen_push_v(s, cpu_T0);
         break;
     case 0x07: /* pop es */
     case 0x17: /* pop ss */
@@ -5324,25 +5221,23 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             goto illegal_op;
         reg = b >> 3;
         ot = gen_pop_T0(s);
-        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
+        gen_movl_seg_T0(s, reg);
         gen_pop_update(s, ot);
-        if (reg == R_SS) {
-            /* if reg == SS, inhibit interrupts/trace. */
-            /* If several instructions disable interrupts, only the
-               _first_ does it */
-            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                gen_helper_set_inhibit_irq(cpu_env);
-            s->tf = 0;
-        }
+        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
-            gen_eob(s);
+            if (reg == R_SS) {
+                s->tf = 0;
+                gen_eob_inhibit_irq(s, true);
+            } else {
+                gen_eob(s);
+            }
         }
         break;
     case 0x1a1: /* pop fs */
     case 0x1a9: /* pop gs */
         ot = gen_pop_T0(s);
-        gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
+        gen_movl_seg_T0(s, (b >> 3) & 7);
         gen_pop_update(s, ot);
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
@@ -5371,11 +5266,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_lea_modrm(env, s, modrm);
         }
         val = insn_get(env, s, ot);
-        tcg_gen_movi_tl(cpu_T[0], val);
+        tcg_gen_movi_tl(cpu_T0, val);
         if (mod != 3) {
-            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
         } else {
-            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T[0]);
+            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T0);
         }
         break;
     case 0x8a:
@@ -5385,7 +5280,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+        gen_op_mov_reg_v(ot, reg, cpu_T0);
         break;
     case 0x8e: /* mov seg, Gv */
         modrm = cpu_ldub_code(env, s->pc++);
@@ -5393,18 +5288,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
-        if (reg == R_SS) {
-            /* if reg == SS, inhibit interrupts/trace */
-            /* If several instructions disable interrupts, only the
-               _first_ does it */
-            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                gen_helper_set_inhibit_irq(cpu_env);
-            s->tf = 0;
-        }
+        gen_movl_seg_T0(s, reg);
+        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
-            gen_eob(s);
+            if (reg == R_SS) {
+                s->tf = 0;
+                gen_eob_inhibit_irq(s, true);
+            } else {
+                gen_eob(s);
+            }
         }
         break;
     case 0x8c: /* mov Gv, seg */
@@ -5439,45 +5332,42 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(ot, cpu_T[0], rm);
+                gen_op_mov_v_reg(ot, cpu_T0, rm);
                 switch (s_ot) {
                 case MO_UB:
-                    tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+                    tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
                     break;
                 case MO_SB:
-                    tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+                    tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
                     break;
                 case MO_UW:
-                    tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+                    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
                     break;
                 default:
                 case MO_SW:
-                    tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+                    tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
                     break;
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
+                gen_op_ld_v(s, s_ot, cpu_T0, cpu_A0);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             }
         }
         break;
 
     case 0x8d: /* lea */
-        ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
-        /* we must ensure that no segment is added */
-        s->override = -1;
-        val = s->addseg;
-        s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
-        s->addseg = val;
-        gen_op_mov_reg_v(ot, reg, cpu_A0);
+        {
+            AddressParts a = gen_lea_modrm_0(env, s, modrm);
+            TCGv ea = gen_lea_modrm_1(a);
+            gen_op_mov_reg_v(dflag, reg, ea);
+        }
         break;
 
     case 0xa0: /* mov EAX, Ov */
@@ -5502,27 +5392,27 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_movi_tl(cpu_A0, offset_addr);
             gen_add_A0_ds_seg(s);
             if ((b & 2) == 0) {
-                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
-                gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
             } else {
-                gen_op_mov_v_reg(ot, cpu_T[0], R_EAX);
-                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+                gen_op_mov_v_reg(ot, cpu_T0, R_EAX);
+                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
             }
         }
         break;
     case 0xd7: /* xlat */
         tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
-        tcg_gen_ext8u_tl(cpu_T[0], cpu_regs[R_EAX]);
-        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
+        tcg_gen_ext8u_tl(cpu_T0, cpu_regs[R_EAX]);
+        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T0);
         gen_extu(s->aflag, cpu_A0);
         gen_add_A0_ds_seg(s);
-        gen_op_ld_v(s, MO_8, cpu_T[0], cpu_A0);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
+        gen_op_ld_v(s, MO_8, cpu_T0, cpu_A0);
+        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(env, s, MO_8);
-        tcg_gen_movi_tl(cpu_T[0], val);
-        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T[0]);
+        tcg_gen_movi_tl(cpu_T0, val);
+        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T0);
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
 #ifdef TARGET_X86_64
@@ -5532,16 +5422,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tmp = cpu_ldq_code(env, s->pc);
             s->pc += 8;
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T[0], tmp);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T0, tmp);
+            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
         } else
 #endif
         {
             ot = dflag;
             val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
-            tcg_gen_movi_tl(cpu_T[0], val);
-            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T0, val);
+            gen_op_mov_reg_v(ot, reg, cpu_T0);
         }
         break;
 
@@ -5560,21 +5450,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
         do_xchg_reg:
-            gen_op_mov_v_reg(ot, cpu_T[0], reg);
-            gen_op_mov_v_reg(ot, cpu_T[1], rm);
-            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
-            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+            gen_op_mov_v_reg(ot, cpu_T0, reg);
+            gen_op_mov_v_reg(ot, cpu_T1, rm);
+            gen_op_mov_reg_v(ot, rm, cpu_T0);
+            gen_op_mov_reg_v(ot, reg, cpu_T1);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, cpu_T[0], reg);
+            gen_op_mov_v_reg(ot, cpu_T0, reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
                 gen_helper_lock();
-            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
-            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
             if (!(prefixes & PREFIX_LOCK))
                 gen_helper_unlock();
-            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+            gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
         break;
     case 0xc4: /* les Gv */
@@ -5601,13 +5491,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_lea_modrm(env, s, modrm);
-        gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
+        gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
-        gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
-        gen_movl_seg_T0(s, op, pc_start - s->cs_base);
+        gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
+        gen_movl_seg_T0(s, op);
         /* then put the data */
-        gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+        gen_op_mov_reg_v(ot, reg, cpu_T1);
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -5686,7 +5576,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else {
             opreg = rm;
         }
-        gen_op_mov_v_reg(ot, cpu_T[1], reg);
+        gen_op_mov_v_reg(ot, cpu_T1, reg);
 
         if (shift) {
             TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
@@ -5837,8 +5727,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             case 0x0c: /* fldenv mem */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0d: /* fldcw mem */
@@ -5847,8 +5735,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x0f: /* fnstcw mem */
@@ -5857,24 +5743,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x1d: /* fldt mem */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fldt_ST0(cpu_env, cpu_A0);
                 break;
             case 0x1f: /* fstpt mem */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fstt_ST0(cpu_env, cpu_A0);
                 gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* frstor mem */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2e: /* fnsave mem */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
                 break;
             case 0x2f: /* fnstsw mem */
@@ -5883,13 +5761,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                                     s->mem_index, MO_LEUW);
                 break;
             case 0x3c: /* fbld */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fbld_ST0(cpu_env, cpu_A0);
                 break;
             case 0x3e: /* fbstp */
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
                 gen_helper_fbst_ST0(cpu_env, cpu_A0);
                 gen_helper_fpop(cpu_env);
                 break;
@@ -5903,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_helper_fpop(cpu_env);
                 break;
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
         } else {
             /* register float ops */
@@ -5924,12 +5798,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 switch(rm) {
                 case 0: /* fnop */
                     /* check exceptions (FreeBSD FPU probe) */
-                    gen_update_cc_op(s);
-                    gen_jmp_im(pc_start - s->cs_base);
                     gen_helper_fwait(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x0c: /* grp d9/4 */
@@ -5948,7 +5820,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_helper_fxam_ST0(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x0d: /* grp d9/5 */
@@ -5983,7 +5855,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                         gen_helper_fldz_ST0(cpu_env);
                         break;
                     default:
-                        goto illegal_op;
+                        goto unknown_op;
                     }
                 }
                 break;
@@ -6083,7 +5955,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_helper_fpop(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x1c:
@@ -6101,7 +5973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 case 4: /* fsetpm (287 only, just do nop here) */
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x1d: /* fucomi */
@@ -6153,7 +6025,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_helper_fpop(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x38: /* ffreep sti, undocumented op */
@@ -6164,11 +6036,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 switch(rm) {
                 case 0:
                     gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
-                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
-                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
+                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
+                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x3d: /* fucomip */
@@ -6214,7 +6086,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
         }
         break;
@@ -6275,7 +6147,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x6c: /* insS */
     case 0x6d:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base, 
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6290,7 +6162,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x6e: /* outsS */
     case 0x6f:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -6310,15 +6182,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xe5:
         ot = mo_b_d32(b, dflag);
         val = cpu_ldub_code(env, s->pc++);
-        tcg_gen_movi_tl(cpu_T[0], val);
+        tcg_gen_movi_tl(cpu_T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_start();
        }
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
+        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
+        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6328,17 +6201,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xe7:
         ot = mo_b_d32(b, dflag);
         val = cpu_ldub_code(env, s->pc++);
-        tcg_gen_movi_tl(cpu_T[0], val);
+        tcg_gen_movi_tl(cpu_T0, val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
+        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
 
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_start();
        }
         tcg_gen_movi_i32(cpu_tmp2_i32, val);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6347,15 +6221,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xec:
     case 0xed:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_start();
        }
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
-        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        gen_helper_in_func(ot, cpu_T1, cpu_tmp2_i32);
+        gen_op_mov_reg_v(ot, R_EAX, cpu_T1);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6364,17 +6239,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0xee:
     case 0xef:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
+        tcg_gen_ext16u_tl(cpu_T0, cpu_regs[R_EDX]);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
-        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
+        gen_op_mov_v_reg(ot, cpu_T1, R_EAX);
 
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_start();
        }
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
+        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
+        gen_bpt_io(s, cpu_tmp2_i32, ot);
         if (s->tb->cflags & CF_USE_ICOUNT) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6389,14 +6265,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         ot = gen_pop_T0(s);
         gen_stack_update(s, val + (1 << ot));
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T[0]);
+        gen_op_jmp_v(cpu_T0);
+        gen_bnd_jmp(s);
         gen_eob(s);
         break;
     case 0xc3: /* ret */
         ot = gen_pop_T0(s);
         gen_pop_update(s, ot);
         /* Note that gen_pop_T0 uses a zero-extending load.  */
-        gen_op_jmp_v(cpu_T[0]);
+        gen_op_jmp_v(cpu_T0);
+        gen_bnd_jmp(s);
         gen_eob(s);
         break;
     case 0xca: /* lret im */
@@ -6411,13 +6289,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else {
             gen_stack_A0(s);
             /* pop offset */
-            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
             /* NOTE: keeping EIP updated is not a problem in case of
                exception */
-            gen_op_jmp_v(cpu_T[0]);
+            gen_op_jmp_v(cpu_T0);
             /* pop selector */
-            gen_op_addl_A0_im(1 << dflag);
-            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
+            gen_add_A0_im(s, 1 << dflag);
+            gen_op_ld_v(s, dflag, cpu_T0, cpu_A0);
             gen_op_movl_seg_T0_vm(R_CS);
             /* add stack offset */
             gen_stack_update(s, val + (2 << dflag));
@@ -6441,8 +6319,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 set_cc_op(s, CC_OP_EFLAGS);
             }
         } else {
-            gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
             set_cc_op(s, CC_OP_EFLAGS);
@@ -6463,8 +6339,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             } else if (!CODE64(s)) {
                 tval &= 0xffffffff;
             }
-            tcg_gen_movi_tl(cpu_T[0], next_eip);
-            gen_push_v(s, cpu_T[0]);
+            tcg_gen_movi_tl(cpu_T0, next_eip);
+            gen_push_v(s, cpu_T0);
+            gen_bnd_jmp(s);
             gen_jmp(s, tval);
         }
         break;
@@ -6478,8 +6355,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T[0], selector);
-            tcg_gen_movi_tl(cpu_T[1], offset);
+            tcg_gen_movi_tl(cpu_T0, selector);
+            tcg_gen_movi_tl(cpu_T1, offset);
         }
         goto do_lcall;
     case 0xe9: /* jmp im */
@@ -6494,6 +6371,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else if (!CODE64(s)) {
             tval &= 0xffffffff;
         }
+        gen_bnd_jmp(s);
         gen_jmp(s, tval);
         break;
     case 0xea: /* ljmp im */
@@ -6506,8 +6384,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             offset = insn_get(env, s, ot);
             selector = insn_get(env, s, MO_16);
 
-            tcg_gen_movi_tl(cpu_T[0], selector);
-            tcg_gen_movi_tl(cpu_T[1], offset);
+            tcg_gen_movi_tl(cpu_T0, selector);
+            tcg_gen_movi_tl(cpu_T1, offset);
         }
         goto do_ljmp;
     case 0xeb: /* jmp Jb */
@@ -6533,12 +6411,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (dflag == MO_16) {
             tval &= 0xffff;
         }
+        gen_bnd_jmp(s);
         gen_jcc(s, b, tval, next_eip);
         break;
 
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
-        gen_setcc1(s, b, cpu_T[0]);
+        gen_setcc1(s, b, cpu_T0);
         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
@@ -6559,8 +6438,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
-            gen_helper_read_eflags(cpu_T[0], cpu_env);
-            gen_push_v(s, cpu_T[0]);
+            gen_helper_read_eflags(cpu_T0, cpu_env);
+            gen_push_v(s, cpu_T0);
         }
         break;
     case 0x9d: /* popf */
@@ -6571,13 +6450,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             ot = gen_pop_T0(s);
             if (s->cpl == 0) {
                 if (dflag != MO_16) {
-                    gen_helper_write_eflags(cpu_env, cpu_T[0],
+                    gen_helper_write_eflags(cpu_env, cpu_T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK |
                                                            IOPL_MASK)));
                 } else {
-                    gen_helper_write_eflags(cpu_env, cpu_T[0],
+                    gen_helper_write_eflags(cpu_env, cpu_T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
                                                            ID_MASK | NT_MASK |
                                                            IF_MASK | IOPL_MASK)
@@ -6586,14 +6465,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             } else {
                 if (s->cpl <= s->iopl) {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                        gen_helper_write_eflags(cpu_env, cpu_T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
                                                                NT_MASK |
                                                                IF_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                        gen_helper_write_eflags(cpu_env, cpu_T0,
                                                 tcg_const_i32((TF_MASK |
                                                                AC_MASK |
                                                                ID_MASK |
@@ -6603,11 +6482,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     }
                 } else {
                     if (dflag != MO_16) {
-                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                        gen_helper_write_eflags(cpu_env, cpu_T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                        gen_helper_write_eflags(cpu_env, cpu_T0,
                                            tcg_const_i32((TF_MASK | AC_MASK |
                                                           ID_MASK | NT_MASK)
                                                          & 0xffff));
@@ -6624,19 +6503,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x9e: /* sahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_op_mov_v_reg(MO_8, cpu_T[0], R_AH);
+        gen_op_mov_v_reg(MO_8, cpu_T0, R_AH);
         gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
-        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
-        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
+        tcg_gen_andi_tl(cpu_T0, cpu_T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
+        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T0);
         break;
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
-        gen_op_mov_reg_v(MO_8, R_AH, cpu_T[0]);
+        tcg_gen_ori_tl(cpu_T0, cpu_cc_src, 0x02);
+        gen_op_mov_reg_v(MO_8, R_AH, cpu_T0);
         break;
     case 0xf5: /* cmc */
         gen_compute_eflags(s);
@@ -6670,15 +6549,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (mod != 3) {
             s->rip_offset = 1;
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T[0], rm);
+            gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
         /* load shift */
         val = cpu_ldub_code(env, s->pc++);
-        tcg_gen_movi_tl(cpu_T[1], val);
+        tcg_gen_movi_tl(cpu_T1, val);
         if (op < 4)
-            goto illegal_op;
+            goto unknown_op;
         op -= 4;
         goto bt_op;
     case 0x1a3: /* bt Gv, Ev */
@@ -6698,46 +6577,46 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
+        gen_op_mov_v_reg(MO_32, cpu_T1, reg);
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             /* specific case: we need to add a displacement */
-            gen_exts(ot, cpu_T[1]);
-            tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
+            gen_exts(ot, cpu_T1);
+            tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
             tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
+            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
         } else {
-            gen_op_mov_v_reg(ot, cpu_T[0], rm);
+            gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
     bt_op:
-        tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
-        tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
+        tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
+        tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
         switch(op) {
         case 0:
             break;
         case 1:
             tcg_gen_movi_tl(cpu_tmp0, 1);
-            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
-            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
             break;
         case 2:
             tcg_gen_movi_tl(cpu_tmp0, 1);
-            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
-            tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+            tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
             break;
         default:
         case 3:
             tcg_gen_movi_tl(cpu_tmp0, 1);
-            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
-            tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
             break;
         }
         if (op != 0) {
             if (mod != 3) {
-                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
             } else {
-                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                gen_op_mov_reg_v(ot, rm, cpu_T0);
             }
         }
 
@@ -6777,7 +6656,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_extu(ot, cpu_T[0]);
+        gen_extu(ot, cpu_T0);
 
         /* Note that lzcnt and tzcnt are in different extensions.  */
         if ((prefixes & PREFIX_REPZ)
@@ -6785,18 +6664,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
             int size = 8 << ot;
-            tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+            tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
             if (b & 1) {
                 /* For lzcnt, reduce the target_ulong result by the
                    number of zeros that we expect to find at the top.  */
-                gen_helper_clz(cpu_T[0], cpu_T[0]);
-                tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size);
+                gen_helper_clz(cpu_T0, cpu_T0);
+                tcg_gen_subi_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - size);
             } else {
                 /* For tzcnt, a zero input must return the operand size:
                    force all bits outside the operand size to 1.  */
                 target_ulong mask = (target_ulong)-2 << (size - 1);
-                tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask);
-                gen_helper_ctz(cpu_T[0], cpu_T[0]);
+                tcg_gen_ori_tl(cpu_T0, cpu_T0, mask);
+                gen_helper_ctz(cpu_T0, cpu_T0);
             }
             /* For lzcnt/tzcnt, C and Z bits are defined and are
                related to the result.  */
@@ -6805,24 +6684,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         } else {
             /* For bsr/bsf, only the Z bit is defined and it is related
                to the input and not the result.  */
-            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
             set_cc_op(s, CC_OP_LOGICB + ot);
             if (b & 1) {
                 /* For bsr, return the bit index of the first 1 bit,
                    not the count of leading zeros.  */
-                gen_helper_clz(cpu_T[0], cpu_T[0]);
-                tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1);
+                gen_helper_clz(cpu_T0, cpu_T0);
+                tcg_gen_xori_tl(cpu_T0, cpu_T0, TARGET_LONG_BITS - 1);
             } else {
-                gen_helper_ctz(cpu_T[0], cpu_T[0]);
+                gen_helper_ctz(cpu_T0, cpu_T0);
             }
             /* ??? The manual says that the output is undefined when the
                input is zero, but real hardware leaves it unchanged, and
                real programs appear to depend on that.  */
             tcg_gen_movi_tl(cpu_tmp0, 0);
-            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
-                               cpu_regs[reg], cpu_T[0]);
+            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T0, cpu_cc_dst, cpu_tmp0,
+                               cpu_regs[reg], cpu_T0);
         }
-        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+        gen_op_mov_reg_v(ot, reg, cpu_T0);
         break;
         /************************/
         /* bcd */
@@ -6895,8 +6774,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             (HF_MP_MASK | HF_TS_MASK)) {
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
         } else {
-            gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fwait(cpu_env);
         }
         break;
@@ -6946,27 +6823,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         break;
     case 0xfb: /* sti */
-        if (!s->vm86) {
-            if (s->cpl <= s->iopl) {
-            gen_sti:
-                gen_helper_sti(cpu_env);
-                /* interruptions are enabled only the first insn after sti */
-                /* If several instructions disable interrupts, only the
-                   _first_ does it */
-                if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                    gen_helper_set_inhibit_irq(cpu_env);
-                /* give a chance to handle pending irqs */
-                gen_jmp_im(s->pc - s->cs_base);
-                gen_eob(s);
-            } else {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            }
+        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
+            gen_helper_sti(cpu_env);
+            /* interruptions are enabled only the first insn after sti */
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob_inhibit_irq(s, true);
         } else {
-            if (s->iopl == 3) {
-                goto gen_sti;
-            } else {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            }
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         }
         break;
     case 0x62: /* bound */
@@ -6978,10 +6841,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_op_mov_v_reg(ot, cpu_T[0], reg);
+        gen_op_mov_v_reg(ot, cpu_T0, reg);
         gen_lea_modrm(env, s, modrm);
-        gen_jmp_im(pc_start - s->cs_base);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
         if (ot == MO_16) {
             gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
         } else {
@@ -6992,24 +6854,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == MO_64) {
-            gen_op_mov_v_reg(MO_64, cpu_T[0], reg);
-            tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
+            gen_op_mov_v_reg(MO_64, cpu_T0, reg);
+            tcg_gen_bswap64_i64(cpu_T0, cpu_T0);
+            gen_op_mov_reg_v(MO_64, reg, cpu_T0);
         } else
 #endif
         {
-            gen_op_mov_v_reg(MO_32, cpu_T[0], reg);
-            tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
-            tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
-            gen_op_mov_reg_v(MO_32, reg, cpu_T[0]);
+            gen_op_mov_v_reg(MO_32, cpu_T0, reg);
+            tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
+            tcg_gen_bswap32_tl(cpu_T0, cpu_T0);
+            gen_op_mov_reg_v(MO_32, reg, cpu_T0);
         }
         break;
     case 0xd6: /* salc */
         if (CODE64(s))
             goto illegal_op;
-        gen_compute_eflags_c(s, cpu_T[0]);
-        tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
-        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
+        gen_compute_eflags_c(s, cpu_T0);
+        tcg_gen_neg_tl(cpu_T0, cpu_T0);
+        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T0);
         break;
     case 0xe0: /* loopnz */
     case 0xe1: /* loopz */
@@ -7094,8 +6956,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysenter(cpu_env);
             gen_eob(s);
         }
@@ -7107,8 +6967,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
             gen_eob(s);
         }
@@ -7125,8 +6983,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
             /* condition codes are modified only in long mode */
             if (s->lma) {
@@ -7160,7 +7016,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
-            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
+            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                             offsetof(CPUX86State, ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
@@ -7172,8 +7029,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                gen_jmp_im(pc_start - s->cs_base);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
             }
             break;
@@ -7181,7 +7037,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (!s->pe || s->vm86)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
-            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
+            tcg_gen_ld32u_tl(cpu_T0, cpu_env,
+                             offsetof(CPUX86State, tr.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
@@ -7193,8 +7050,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                gen_jmp_im(pc_start - s->cs_base);
-                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
             }
             break;
@@ -7205,279 +7061,347 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_update_cc_op(s);
             if (op == 4) {
-                gen_helper_verr(cpu_env, cpu_T[0]);
+                gen_helper_verr(cpu_env, cpu_T0);
             } else {
-                gen_helper_verw(cpu_env, cpu_T[0]);
+                gen_helper_verw(cpu_env, cpu_T0);
             }
             set_cc_op(s, CC_OP_EFLAGS);
             break;
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
+
     case 0x101:
         modrm = cpu_ldub_code(env, s->pc++);
-        mod = (modrm >> 6) & 3;
-        op = (modrm >> 3) & 7;
-        rm = modrm & 7;
-        switch(op) {
-        case 0: /* sgdt */
-            if (mod == 3)
-                goto illegal_op;
+        switch (modrm) {
+        CASE_MODRM_MEM_OP(0): /* sgdt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
             gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
-            gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
+            tcg_gen_ld32u_tl(cpu_T0,
+                             cpu_env, offsetof(CPUX86State, gdt.limit));
+            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
             gen_add_A0_im(s, 2);
-            tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
             if (dflag == MO_16) {
-                tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
+                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
             }
-            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
+            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
             break;
-        case 1:
-            if (mod == 3) {
-                switch (rm) {
-                case 0: /* monitor */
-                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
-                        s->cpl != 0)
-                        goto illegal_op;
-                    gen_update_cc_op(s);
-                    gen_jmp_im(pc_start - s->cs_base);
-                    tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
-                    gen_extu(s->aflag, cpu_A0);
-                    gen_add_A0_ds_seg(s);
-                    gen_helper_monitor(cpu_env, cpu_A0);
-                    break;
-                case 1: /* mwait */
-                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
-                        s->cpl != 0)
-                        goto illegal_op;
-                    gen_update_cc_op(s);
-                    gen_jmp_im(pc_start - s->cs_base);
-                    gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
-                    gen_eob(s);
-                    break;
-                case 2: /* clac */
-                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
-                        s->cpl != 0) {
-                        goto illegal_op;
-                    }
-                    gen_helper_clac(cpu_env);
-                    gen_jmp_im(s->pc - s->cs_base);
-                    gen_eob(s);
-                    break;
-                case 3: /* stac */
-                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
-                        s->cpl != 0) {
-                        goto illegal_op;
-                    }
-                    gen_helper_stac(cpu_env);
-                    gen_jmp_im(s->pc - s->cs_base);
-                    gen_eob(s);
-                    break;
-                default:
-                    goto illegal_op;
-                }
-            } else { /* sidt */
-                gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
-                gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
-                gen_add_A0_im(s, 2);
-                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base));
-                if (dflag == MO_16) {
-                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
-                }
-                gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
+
+        case 0xc8: /* monitor */
+            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
+                goto illegal_op;
             }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
+            gen_extu(s->aflag, cpu_A0);
+            gen_add_A0_ds_seg(s);
+            gen_helper_monitor(cpu_env, cpu_A0);
             break;
-        case 2: /* lgdt */
-        case 3: /* lidt */
-            if (mod == 3) {
-                gen_update_cc_op(s);
-                gen_jmp_im(pc_start - s->cs_base);
-                switch(rm) {
-                case 0: /* VMRUN */
-                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
-                        goto illegal_op;
-                    if (s->cpl != 0) {
-                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                        break;
-                    } else {
-                        gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
-                                         tcg_const_i32(s->pc - pc_start));
-                        tcg_gen_exit_tb(0);
-                        s->is_jmp = DISAS_TB_JUMP;
-                    }
-                    break;
-                case 1: /* VMMCALL */
-                    if (!(s->flags & HF_SVME_MASK))
-                        goto illegal_op;
-                    gen_helper_vmmcall(cpu_env);
-                    break;
-                case 2: /* VMLOAD */
-                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
-                        goto illegal_op;
-                    if (s->cpl != 0) {
-                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                        break;
-                    } else {
-                        gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
-                    }
-                    break;
-                case 3: /* VMSAVE */
-                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
-                        goto illegal_op;
-                    if (s->cpl != 0) {
-                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                        break;
-                    } else {
-                        gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
-                    }
-                    break;
-                case 4: /* STGI */
-                    if ((!(s->flags & HF_SVME_MASK) &&
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
-                        !s->pe)
-                        goto illegal_op;
-                    if (s->cpl != 0) {
-                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                        break;
-                    } else {
-                        gen_helper_stgi(cpu_env);
-                    }
-                    break;
-                case 5: /* CLGI */
-                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
-                        goto illegal_op;
-                    if (s->cpl != 0) {
-                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                        break;
-                    } else {
-                        gen_helper_clgi(cpu_env);
-                    }
-                    break;
-                case 6: /* SKINIT */
-                    if ((!(s->flags & HF_SVME_MASK) && 
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
-                        !s->pe)
-                        goto illegal_op;
-                    gen_helper_skinit(cpu_env);
-                    break;
-                case 7: /* INVLPGA */
-                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
-                        goto illegal_op;
-                    if (s->cpl != 0) {
-                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                        break;
-                    } else {
-                        gen_helper_invlpga(cpu_env,
-                                           tcg_const_i32(s->aflag - 1));
-                    }
-                    break;
-                default:
-                    goto illegal_op;
-                }
-            } else if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            } else {
-                gen_svm_check_intercept(s, pc_start,
-                                        op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
-                gen_add_A0_im(s, 2);
-                gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
-                if (dflag == MO_16) {
-                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
-                }
-                if (op == 2) {
-                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base));
-                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit));
-                } else {
-                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base));
-                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit));
-                }
+
+        case 0xc9: /* mwait */
+            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
+                goto illegal_op;
             }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
+            gen_eob(s);
             break;
-        case 4: /* smsw */
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
-#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN
-            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4);
-#else
-            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
-#endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+
+        case 0xca: /* clac */
+            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
+                || s->cpl != 0) {
+                goto illegal_op;
+            }
+            gen_helper_clac(cpu_env);
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
             break;
-        case 6: /* lmsw */
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            } else {
-                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-                gen_helper_lmsw(cpu_env, cpu_T[0]);
-                gen_jmp_im(s->pc - s->cs_base);
-                gen_eob(s);
+
+        case 0xcb: /* stac */
+            if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
+                || s->cpl != 0) {
+                goto illegal_op;
             }
+            gen_helper_stac(cpu_env);
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
             break;
-        case 7:
-            if (mod != 3) { /* invlpg */
+
+        CASE_MODRM_MEM_OP(1): /* sidt */
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
+            gen_lea_modrm(env, s, modrm);
+            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
+            gen_op_st_v(s, MO_16, cpu_T0, cpu_A0);
+            gen_add_A0_im(s, 2);
+            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
+            if (dflag == MO_16) {
+                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+            }
+            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            break;
+
+        case 0xd0: /* xgetbv */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
+                goto illegal_op;
+            }
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xgetbv(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            break;
+
+        case 0xd1: /* xsetbv */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
+                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                  cpu_regs[R_EDX]);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_xsetbv(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            /* End TB because translation flags may change.  */
+            gen_jmp_im(s->pc - pc_start);
+            gen_eob(s);
+            break;
+
+        case 0xd8: /* VMRUN */
+            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
+                             tcg_const_i32(s->pc - pc_start));
+            tcg_gen_exit_tb(0);
+            s->is_jmp = DISAS_TB_JUMP;
+            break;
+
+        case 0xd9: /* VMMCALL */
+            if (!(s->flags & HF_SVME_MASK)) {
+                goto illegal_op;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_vmmcall(cpu_env);
+            break;
+
+        case 0xda: /* VMLOAD */
+            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
+            break;
+
+        case 0xdb: /* VMSAVE */
+            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
+            break;
+
+        case 0xdc: /* STGI */
+            if ((!(s->flags & HF_SVME_MASK)
+                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
+                || !s->pe) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_stgi(cpu_env);
+            break;
+
+        case 0xdd: /* CLGI */
+            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_clgi(cpu_env);
+            break;
+
+        case 0xde: /* SKINIT */
+            if ((!(s->flags & HF_SVME_MASK)
+                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
+                || !s->pe) {
+                goto illegal_op;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_skinit(cpu_env);
+            break;
+
+        case 0xdf: /* INVLPGA */
+            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+                goto illegal_op;
+            }
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
+            break;
+
+        CASE_MODRM_MEM_OP(2): /* lgdt */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
+            gen_lea_modrm(env, s, modrm);
+            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_add_A0_im(s, 2);
+            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            if (dflag == MO_16) {
+                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+            }
+            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, gdt.base));
+            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
+            break;
+
+        CASE_MODRM_MEM_OP(3): /* lidt */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
+            gen_lea_modrm(env, s, modrm);
+            gen_op_ld_v(s, MO_16, cpu_T1, cpu_A0);
+            gen_add_A0_im(s, 2);
+            gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
+            if (dflag == MO_16) {
+                tcg_gen_andi_tl(cpu_T0, cpu_T0, 0xffffff);
+            }
+            tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.base));
+            tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
+            break;
+
+        CASE_MODRM_OP(4): /* smsw */
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
+            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
+            if (CODE64(s)) {
+                mod = (modrm >> 6) & 3;
+                ot = (mod != 3 ? MO_16 : s->dflag);
+            } else {
+                ot = MO_16;
+            }
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            break;
+        case 0xee: /* rdpkru */
+            if (prefixes & PREFIX_LOCK) {
+                goto illegal_op;
+            }
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_rdpkru(cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
+            tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], cpu_tmp1_i64);
+            break;
+        case 0xef: /* wrpkru */
+            if (prefixes & PREFIX_LOCK) {
+                goto illegal_op;
+            }
+            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                  cpu_regs[R_EDX]);
+            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_ECX]);
+            gen_helper_wrpkru(cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
+            break;
+        CASE_MODRM_OP(6): /* lmsw */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_helper_lmsw(cpu_env, cpu_T0);
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+            break;
+
+        CASE_MODRM_MEM_OP(7): /* invlpg */
+            if (s->cpl != 0) {
+                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+                break;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            gen_lea_modrm(env, s, modrm);
+            gen_helper_invlpg(cpu_env, cpu_A0);
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
+            break;
+
+        case 0xf8: /* swapgs */
+#ifdef TARGET_X86_64
+            if (CODE64(s)) {
                 if (s->cpl != 0) {
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
-                    gen_update_cc_op(s);
-                    gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
-                    gen_helper_invlpg(cpu_env, cpu_A0);
-                    gen_jmp_im(s->pc - s->cs_base);
-                    gen_eob(s);
+                    tcg_gen_mov_tl(cpu_T0, cpu_seg_base[R_GS]);
+                    tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
+                                  offsetof(CPUX86State, kernelgsbase));
+                    tcg_gen_st_tl(cpu_T0, cpu_env,
+                                  offsetof(CPUX86State, kernelgsbase));
                 }
-            } else {
-                switch (rm) {
-                case 0: /* swapgs */
-#ifdef TARGET_X86_64
-                    if (CODE64(s)) {
-                        if (s->cpl != 0) {
-                            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                        } else {
-                            tcg_gen_ld_tl(cpu_T[0], cpu_env,
-                                offsetof(CPUX86State,segs[R_GS].base));
-                            tcg_gen_ld_tl(cpu_T[1], cpu_env,
-                                offsetof(CPUX86State,kernelgsbase));
-                            tcg_gen_st_tl(cpu_T[1], cpu_env,
-                                offsetof(CPUX86State,segs[R_GS].base));
-                            tcg_gen_st_tl(cpu_T[0], cpu_env,
-                                offsetof(CPUX86State,kernelgsbase));
-                        }
-                    } else
+                break;
+            }
 #endif
-                    {
-                        goto illegal_op;
-                    }
-                    break;
-                case 1: /* rdtscp */
-                    if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
-                        goto illegal_op;
-                    gen_update_cc_op(s);
-                    gen_jmp_im(pc_start - s->cs_base);
-                    if (s->tb->cflags & CF_USE_ICOUNT) {
-                        gen_io_start();
-                   }
-                    gen_helper_rdtscp(cpu_env);
-                    if (s->tb->cflags & CF_USE_ICOUNT) {
-                        gen_io_end();
-                        gen_jmp(s, s->pc - s->cs_base);
-                    }
-                    break;
-                default:
-                    goto illegal_op;
-                }
+            goto illegal_op;
+
+        case 0xf9: /* rdtscp */
+            if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
+                goto illegal_op;
+            }
+            gen_update_cc_op(s);
+            gen_jmp_im(pc_start - s->cs_base);
+            if (s->tb->cflags & CF_USE_ICOUNT) {
+                gen_io_start();
+            }
+            gen_helper_rdtscp(cpu_env);
+            if (s->tb->cflags & CF_USE_ICOUNT) {
+                gen_io_end();
+                gen_jmp(s, s->pc - s->cs_base);
             }
             break;
+
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
+
     case 0x108: /* invd */
     case 0x109: /* wbinvd */
         if (s->cpl != 0) {
@@ -7500,16 +7424,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             rm = (modrm & 7) | REX_B(s);
 
             if (mod == 3) {
-                gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
+                gen_op_mov_v_reg(MO_32, cpu_T0, rm);
                 /* sign extend */
                 if (d_ot == MO_64) {
-                    tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                    tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
                 }
-                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             } else {
                 gen_lea_modrm(env, s, modrm);
-                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
-                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
+                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T0, cpu_A0);
+                gen_op_mov_reg_v(d_ot, reg, cpu_T0);
             }
         } else
 #endif
@@ -7574,9 +7498,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
-                gen_helper_lar(t0, cpu_env, cpu_T[0]);
+                gen_helper_lar(t0, cpu_env, cpu_T0);
             } else {
-                gen_helper_lsl(t0, cpu_env, cpu_T[0]);
+                gen_helper_lsl(t0, cpu_env, cpu_T0);
             }
             tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
@@ -7598,7 +7522,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_nop_modrm(env, s, modrm);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7606,7 +7530,199 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
         }
         break;
-    case 0x119 ... 0x11f: /* nop (multi byte) */
+    case 0x11a:
+        modrm = cpu_ldub_code(env, s->pc++);
+        if (s->flags & HF_MPX_EN_MASK) {
+            mod = (modrm >> 6) & 3;
+            reg = ((modrm >> 3) & 7) | rex_r;
+            if (prefixes & PREFIX_REPZ) {
+                /* bndcl */
+                if (reg >= 4
+                    || (prefixes & PREFIX_LOCK)
+                    || s->aflag == MO_16) {
+                    goto illegal_op;
+                }
+                gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
+            } else if (prefixes & PREFIX_REPNZ) {
+                /* bndcu */
+                if (reg >= 4
+                    || (prefixes & PREFIX_LOCK)
+                    || s->aflag == MO_16) {
+                    goto illegal_op;
+                }
+                TCGv_i64 notu = tcg_temp_new_i64();
+                tcg_gen_not_i64(notu, cpu_bndu[reg]);
+                gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
+                tcg_temp_free_i64(notu);
+            } else if (prefixes & PREFIX_DATA) {
+                /* bndmov -- from reg/mem */
+                if (reg >= 4 || s->aflag == MO_16) {
+                    goto illegal_op;
+                }
+                if (mod == 3) {
+                    int reg2 = (modrm & 7) | REX_B(s);
+                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
+                        goto illegal_op;
+                    }
+                    if (s->flags & HF_MPX_IU_MASK) {
+                        tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
+                        tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
+                    }
+                } else {
+                    gen_lea_modrm(env, s, modrm);
+                    if (CODE64(s)) {
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                                            s->mem_index, MO_LEQ);
+                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                                            s->mem_index, MO_LEQ);
+                    } else {
+                        tcg_gen_qemu_ld_i64(cpu_bndl[reg], cpu_A0,
+                                            s->mem_index, MO_LEUL);
+                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
+                        tcg_gen_qemu_ld_i64(cpu_bndu[reg], cpu_A0,
+                                            s->mem_index, MO_LEUL);
+                    }
+                    /* bnd registers are now in-use */
+                    gen_set_hflag(s, HF_MPX_IU_MASK);
+                }
+            } else if (mod != 3) {
+                /* bndldx */
+                AddressParts a = gen_lea_modrm_0(env, s, modrm);
+                if (reg >= 4
+                    || (prefixes & PREFIX_LOCK)
+                    || s->aflag == MO_16
+                    || a.base < -1) {
+                    goto illegal_op;
+                }
+                if (a.base >= 0) {
+                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                } else {
+                    tcg_gen_movi_tl(cpu_A0, 0);
+                }
+                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                if (a.index >= 0) {
+                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                } else {
+                    tcg_gen_movi_tl(cpu_T0, 0);
+                }
+                if (CODE64(s)) {
+                    gen_helper_bndldx64(cpu_bndl[reg], cpu_env, cpu_A0, cpu_T0);
+                    tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
+                                   offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
+                } else {
+                    gen_helper_bndldx32(cpu_bndu[reg], cpu_env, cpu_A0, cpu_T0);
+                    tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
+                    tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
+                }
+                gen_set_hflag(s, HF_MPX_IU_MASK);
+            }
+        }
+        gen_nop_modrm(env, s, modrm);
+        break;
+    case 0x11b:
+        modrm = cpu_ldub_code(env, s->pc++);
+        if (s->flags & HF_MPX_EN_MASK) {
+            mod = (modrm >> 6) & 3;
+            reg = ((modrm >> 3) & 7) | rex_r;
+            if (mod != 3 && (prefixes & PREFIX_REPZ)) {
+                /* bndmk */
+                if (reg >= 4
+                    || (prefixes & PREFIX_LOCK)
+                    || s->aflag == MO_16) {
+                    goto illegal_op;
+                }
+                AddressParts a = gen_lea_modrm_0(env, s, modrm);
+                if (a.base >= 0) {
+                    tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
+                    if (!CODE64(s)) {
+                        tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
+                    }
+                } else if (a.base == -1) {
+                    /* no base register has lower bound of 0 */
+                    tcg_gen_movi_i64(cpu_bndl[reg], 0);
+                } else {
+                    /* rip-relative generates #ud */
+                    goto illegal_op;
+                }
+                tcg_gen_not_tl(cpu_A0, gen_lea_modrm_1(a));
+                if (!CODE64(s)) {
+                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
+                }
+                tcg_gen_extu_tl_i64(cpu_bndu[reg], cpu_A0);
+                /* bnd registers are now in-use */
+                gen_set_hflag(s, HF_MPX_IU_MASK);
+                break;
+            } else if (prefixes & PREFIX_REPNZ) {
+                /* bndcn */
+                if (reg >= 4
+                    || (prefixes & PREFIX_LOCK)
+                    || s->aflag == MO_16) {
+                    goto illegal_op;
+                }
+                gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
+            } else if (prefixes & PREFIX_DATA) {
+                /* bndmov -- to reg/mem */
+                if (reg >= 4 || s->aflag == MO_16) {
+                    goto illegal_op;
+                }
+                if (mod == 3) {
+                    int reg2 = (modrm & 7) | REX_B(s);
+                    if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
+                        goto illegal_op;
+                    }
+                    if (s->flags & HF_MPX_IU_MASK) {
+                        tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
+                        tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
+                    }
+                } else {
+                    gen_lea_modrm(env, s, modrm);
+                    if (CODE64(s)) {
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                                            s->mem_index, MO_LEQ);
+                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 8);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                                            s->mem_index, MO_LEQ);
+                    } else {
+                        tcg_gen_qemu_st_i64(cpu_bndl[reg], cpu_A0,
+                                            s->mem_index, MO_LEUL);
+                        tcg_gen_addi_tl(cpu_A0, cpu_A0, 4);
+                        tcg_gen_qemu_st_i64(cpu_bndu[reg], cpu_A0,
+                                            s->mem_index, MO_LEUL);
+                    }
+                }
+            } else if (mod != 3) {
+                /* bndstx */
+                AddressParts a = gen_lea_modrm_0(env, s, modrm);
+                if (reg >= 4
+                    || (prefixes & PREFIX_LOCK)
+                    || s->aflag == MO_16
+                    || a.base < -1) {
+                    goto illegal_op;
+                }
+                if (a.base >= 0) {
+                    tcg_gen_addi_tl(cpu_A0, cpu_regs[a.base], a.disp);
+                } else {
+                    tcg_gen_movi_tl(cpu_A0, 0);
+                }
+                gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+                if (a.index >= 0) {
+                    tcg_gen_mov_tl(cpu_T0, cpu_regs[a.index]);
+                } else {
+                    tcg_gen_movi_tl(cpu_T0, 0);
+                }
+                if (CODE64(s)) {
+                    gen_helper_bndstx64(cpu_env, cpu_A0, cpu_T0,
+                                        cpu_bndl[reg], cpu_bndu[reg]);
+                } else {
+                    gen_helper_bndstx32(cpu_env, cpu_A0, cpu_T0,
+                                        cpu_bndl[reg], cpu_bndu[reg]);
+                }
+            }
+        }
+        gen_nop_modrm(env, s, modrm);
+        break;
+    case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_nop_modrm(env, s, modrm);
         break;
@@ -7640,18 +7756,18 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
-                    gen_op_mov_v_reg(ot, cpu_T[0], rm);
+                    gen_op_mov_v_reg(ot, cpu_T0, rm);
                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
-                                         cpu_T[0]);
+                                         cpu_T0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
-                    gen_helper_read_crN(cpu_T[0], cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                    gen_helper_read_crN(cpu_T0, cpu_env, tcg_const_i32(reg));
+                    gen_op_mov_reg_v(ot, rm, cpu_T0);
                 }
                 break;
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
         }
         break;
@@ -7672,19 +7788,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 ot = MO_64;
             else
                 ot = MO_32;
-            /* XXX: do it dynamically with CR4.DE bit */
-            if (reg == 4 || reg == 5 || reg >= 8)
+            if (reg >= 8) {
                 goto illegal_op;
+            }
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_v_reg(ot, cpu_T[0], rm);
-                gen_helper_movl_drN_T0(cpu_env, tcg_const_i32(reg), cpu_T[0]);
+                gen_op_mov_v_reg(ot, cpu_T0, rm);
+                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
+                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T0);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
-                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
-                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
+                gen_helper_get_dr(cpu_T0, cpu_env, cpu_tmp2_i32);
+                gen_op_mov_reg_v(ot, rm, cpu_T0);
             }
         }
         break;
@@ -7714,83 +7832,195 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
-        mod = (modrm >> 6) & 3;
-        op = (modrm >> 3) & 7;
-        switch(op) {
-        case 0: /* fxsave */
-            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
-                (s->prefix & PREFIX_LOCK))
+        switch (modrm) {
+        CASE_MODRM_MEM_OP(0): /* fxsave */
+            if (!(s->cpuid_features & CPUID_FXSR)
+                || (prefixes & PREFIX_LOCK)) {
                 goto illegal_op;
+            }
             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0);
             break;
-        case 1: /* fxrstor */
-            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
-                (s->prefix & PREFIX_LOCK))
+
+        CASE_MODRM_MEM_OP(1): /* fxrstor */
+            if (!(s->cpuid_features & CPUID_FXSR)
+                || (prefixes & PREFIX_LOCK)) {
                 goto illegal_op;
+            }
             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
             gen_lea_modrm(env, s, modrm);
-            gen_update_cc_op(s);
-            gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0);
             break;
-        case 2: /* ldmxcsr */
-        case 3: /* stmxcsr */
+
+        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
+            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
+                goto illegal_op;
+            }
             if (s->flags & HF_TS_MASK) {
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
-                mod == 3)
+            gen_lea_modrm(env, s, modrm);
+            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0, s->mem_index, MO_LEUL);
+            gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
+            break;
+
+        CASE_MODRM_MEM_OP(3): /* stmxcsr */
+            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
                 goto illegal_op;
+            }
+            if (s->flags & HF_TS_MASK) {
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
             gen_lea_modrm(env, s, modrm);
-            if (op == 2) {
-                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
-                                    s->mem_index, MO_LEUL);
-                gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
-            } else {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
-                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
+            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, mxcsr));
+            gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
+            break;
+
+        CASE_MODRM_MEM_OP(4): /* xsave */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
+                                | PREFIX_REPZ | PREFIX_REPNZ))) {
+                goto illegal_op;
             }
+            gen_lea_modrm(env, s, modrm);
+            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                  cpu_regs[R_EDX]);
+            gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
             break;
-        case 5: /* lfence */
-        case 6: /* mfence */
-            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
+
+        CASE_MODRM_MEM_OP(5): /* xrstor */
+            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
+                                | PREFIX_REPZ | PREFIX_REPNZ))) {
                 goto illegal_op;
+            }
+            gen_lea_modrm(env, s, modrm);
+            tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                  cpu_regs[R_EDX]);
+            gen_helper_xrstor(cpu_env, cpu_A0, cpu_tmp1_i64);
+            /* XRSTOR is how MPX is enabled, which changes how
+               we translate.  Thus we need to end the TB.  */
+            gen_update_cc_op(s);
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob(s);
             break;
-        case 7: /* sfence / clflush */
-            if ((modrm & 0xc7) == 0xc0) {
-                /* sfence */
-                /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
-                if (!(s->cpuid_features & CPUID_SSE))
+
+        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
+            if (prefixes & PREFIX_LOCK) {
+                goto illegal_op;
+            }
+            if (prefixes & PREFIX_DATA) {
+                /* clwb */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
                     goto illegal_op;
+                }
+                gen_nop_modrm(env, s, modrm);
             } else {
-                /* clflush */
-                if (!(s->cpuid_features & CPUID_CLFLUSH))
+                /* xsaveopt */
+                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
+                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
+                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
                     goto illegal_op;
+                }
                 gen_lea_modrm(env, s, modrm);
+                tcg_gen_concat_tl_i64(cpu_tmp1_i64, cpu_regs[R_EAX],
+                                      cpu_regs[R_EDX]);
+                gen_helper_xsaveopt(cpu_env, cpu_A0, cpu_tmp1_i64);
+            }
+            break;
+
+        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
+            if (prefixes & PREFIX_LOCK) {
+                goto illegal_op;
+            }
+            if (prefixes & PREFIX_DATA) {
+                /* clflushopt */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
+                    goto illegal_op;
+                }
+            } else {
+                /* clflush */
+                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
+                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
+                    goto illegal_op;
+                }
+            }
+            gen_nop_modrm(env, s, modrm);
+            break;
+
+        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
+        case 0xc8 ... 0xc8: /* rdgsbase (f3 0f ae /1) */
+        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
+        case 0xd8 ... 0xd8: /* wrgsbase (f3 0f ae /3) */
+            if (CODE64(s)
+                && (prefixes & PREFIX_REPZ)
+                && !(prefixes & PREFIX_LOCK)
+                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
+                TCGv base, treg, src, dst;
+
+                /* Preserve hflags bits by testing CR4 at runtime.  */
+                tcg_gen_movi_i32(cpu_tmp2_i32, CR4_FSGSBASE_MASK);
+                gen_helper_cr4_testbit(cpu_env, cpu_tmp2_i32);
+
+                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
+                treg = cpu_regs[(modrm & 7) | REX_B(s)];
+
+                if (modrm & 0x10) {
+                    /* wr*base */
+                    dst = base, src = treg;
+                } else {
+                    /* rd*base */
+                    dst = treg, src = base;
+                }
+
+                if (s->dflag == MO_32) {
+                    tcg_gen_ext32u_tl(dst, src);
+                } else {
+                    tcg_gen_mov_tl(dst, src);
+                }
+                break;
+            }
+            goto unknown_op;
+
+        case 0xf8: /* sfence / pcommit */
+            if (prefixes & PREFIX_DATA) {
+                /* pcommit */
+                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
+                    || (prefixes & PREFIX_LOCK)) {
+                    goto illegal_op;
+                }
+                break;
+            }
+            /* fallthru */
+        case 0xf9 ... 0xff: /* sfence */
+        case 0xe8 ... 0xef: /* lfence */
+        case 0xf0 ... 0xf7: /* mfence */
+            if (!(s->cpuid_features & CPUID_SSE2)
+                || (prefixes & PREFIX_LOCK)) {
+                goto illegal_op;
             }
             break;
+
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
+
     case 0x10d: /* 3DNow! prefetch(w) */
         modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
-        /* ignore for now */
+        gen_nop_modrm(env, s, modrm);
         break;
     case 0x1aa: /* rsm */
         gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
@@ -7818,8 +8048,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-        gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
-        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
+        gen_helper_popcnt(cpu_T0, cpu_env, cpu_T0, tcg_const_i32(ot));
+        gen_op_mov_reg_v(ot, reg, cpu_T0);
 
         set_cc_op(s, CC_OP_EFLAGS);
         break;
@@ -7837,7 +8067,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_sse(env, s, b, pc_start, rex_r);
         break;
     default:
-        goto illegal_op;
+        goto unknown_op;
     }
     /* lock generation */
     if (s->prefix & PREFIX_LOCK)
@@ -7847,11 +8077,17 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     if (s->prefix & PREFIX_LOCK)
         gen_helper_unlock();
     /* XXX: ensure that no lock was generated */
-    gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+    gen_illegal_opcode(s);
+    return s->pc;
+ unknown_op:
+    if (s->prefix & PREFIX_LOCK)
+        gen_helper_unlock();
+    /* XXX: ensure that no lock was generated */
+    gen_unknown_opcode(env, s);
     return s->pc;
 }
 
-void optimize_flags_init(void)
+void tcg_x86_init(void)
 {
     static const char reg_names[CPU_NB_REGS][4] = {
 #ifdef TARGET_X86_64
@@ -7882,38 +8118,66 @@ void optimize_flags_init(void)
         [R_ESP] = "esp",
 #endif
     };
+    static const char seg_base_names[6][8] = {
+        [R_CS] = "cs_base",
+        [R_DS] = "ds_base",
+        [R_ES] = "es_base",
+        [R_FS] = "fs_base",
+        [R_GS] = "gs_base",
+        [R_SS] = "ss_base",
+    };
+    static const char bnd_regl_names[4][8] = {
+        "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
+    };
+    static const char bnd_regu_names[4][8] = {
+        "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
+    };
     int i;
 
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
-    cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
+    cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
                                        offsetof(CPUX86State, cc_op), "cc_op");
-    cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
+    cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
                                     "cc_dst");
-    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
+    cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
                                     "cc_src");
-    cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
+    cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
                                      "cc_src2");
 
     for (i = 0; i < CPU_NB_REGS; ++i) {
-        cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
+        cpu_regs[i] = tcg_global_mem_new(cpu_env,
                                          offsetof(CPUX86State, regs[i]),
                                          reg_names[i]);
     }
+
+    for (i = 0; i < 6; ++i) {
+        cpu_seg_base[i]
+            = tcg_global_mem_new(cpu_env,
+                                 offsetof(CPUX86State, segs[i].base),
+                                 seg_base_names[i]);
+    }
+
+    for (i = 0; i < 4; ++i) {
+        cpu_bndl[i]
+            = tcg_global_mem_new_i64(cpu_env,
+                                     offsetof(CPUX86State, bnd_regs[i].lb),
+                                     bnd_regl_names[i]);
+        cpu_bndu[i]
+            = tcg_global_mem_new_i64(cpu_env,
+                                     offsetof(CPUX86State, bnd_regs[i].ub),
+                                     bnd_regu_names[i]);
+    }
+
+    helper_lock_init();
 }
 
-/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
-   basic block 'tb'. If search_pc is TRUE, also generate PC
-   information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(X86CPU *cpu,
-                                                  TranslationBlock *tb,
-                                                  bool search_pc)
+/* generate intermediate code for basic block 'tb'.  */
+void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
 {
+    X86CPU *cpu = x86_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
-    CPUX86State *env = &cpu->env;
     DisasContext dc1, *dc = &dc1;
     target_ulong pc_ptr;
-    CPUBreakpoint *bp;
-    int j, lj;
     uint64_t flags;
     target_ulong pc_start;
     target_ulong cs_base;
@@ -7943,13 +8207,14 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
     /* select memory access functions */
     dc->mem_index = 0;
     if (flags & HF_SOFTMMU_MASK) {
-        dc->mem_index = cpu_mmu_index(env);
+       dc->mem_index = cpu_mmu_index(env, false);
     }
     dc->cpuid_features = env->features[FEAT_1_EDX];
     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
+    dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
 #ifdef TARGET_X86_64
     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
@@ -7978,8 +8243,8 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
         printf("ERROR addseg\n");
 #endif
 
-    cpu_T[0] = tcg_temp_new();
-    cpu_T[1] = tcg_temp_new();
+    cpu_T0 = tcg_temp_new();
+    cpu_T1 = tcg_temp_new();
     cpu_A0 = tcg_temp_new();
 
     cpu_tmp0 = tcg_temp_new();
@@ -7993,40 +8258,37 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
 
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
-    lj = -1;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
-    if (max_insns == 0)
+    if (max_insns == 0) {
         max_insns = CF_COUNT_MASK;
+    }
+    if (max_insns > TCG_MAX_INSNS) {
+        max_insns = TCG_MAX_INSNS;
+    }
 
     gen_tb_start(tb);
     for(;;) {
-        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
-            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-                if (bp->pc == pc_ptr &&
-                    !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) {
-                    gen_debug(dc, pc_ptr - dc->cs_base);
-                    goto done_generating;
-                }
-            }
-        }
-        if (search_pc) {
-            j = tcg_op_buf_count();
-            if (lj < j) {
-                lj++;
-                while (lj < j)
-                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
-            }
-            tcg_ctx.gen_opc_pc[lj] = pc_ptr;
-            gen_opc_cc_op[lj] = dc->cc_op;
-            tcg_ctx.gen_opc_instr_start[lj] = 1;
-            tcg_ctx.gen_opc_icount[lj] = num_insns;
-        }
-        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+        tcg_gen_insn_start(pc_ptr, dc->cc_op);
+        num_insns++;
+
+        /* If RF is set, suppress an internally generated breakpoint.  */
+        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
+                                         tb->flags & HF_RF_MASK
+                                         ? BP_GDB : BP_ANY))) {
+            gen_debug(dc, pc_ptr - dc->cs_base);
+            /* The address covered by the breakpoint must be included in
+               [tb->pc, tb->pc + tb->size) in order to for it to be
+               properly cleared -- thus we increment the PC here so that
+               the logic setting tb->size below does the right thing.  */
+            pc_ptr += 1;
+            goto done_generating;
+        }
+        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
+        }
 
         pc_ptr = disas_insn(env, dc, pc_ptr);
-        num_insns++;
         /* stop translation if indicated */
         if (dc->is_jmp)
             break;
@@ -8074,14 +8336,6 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu,
 done_generating:
     gen_tb_end(tb, num_insns);
 
-    /* we don't forget to fill the last values */
-    if (search_pc) {
-        j = tcg_op_buf_count();
-        lj++;
-        while (lj <= j)
-            tcg_ctx.gen_opc_instr_start[lj++] = 0;
-    }
-
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         int disas_flags;
@@ -8098,42 +8352,16 @@ done_generating:
     }
 #endif
 
-    if (!search_pc) {
-        tb->size = pc_ptr - pc_start;
-        tb->icount = num_insns;
-    }
+    tb->size = pc_ptr - pc_start;
+    tb->icount = num_insns;
 }
 
-void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
+void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
+                          target_ulong *data)
 {
-    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, false);
-}
-
-void gen_intermediate_code_pc(CPUX86State *env, TranslationBlock *tb)
-{
-    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, true);
-}
-
-void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int pc_pos)
-{
-    int cc_op;
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
-        int i;
-        qemu_log("RESTORE:\n");
-        for(i = 0;i <= pc_pos; i++) {
-            if (tcg_ctx.gen_opc_instr_start[i]) {
-                qemu_log("0x%04x: " TARGET_FMT_lx "\n", i,
-                        tcg_ctx.gen_opc_pc[i]);
-            }
-        }
-        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                pc_pos, tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base,
-                (uint32_t)tb->cs_base);
-    }
-#endif
-    env->eip = tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base;
-    cc_op = gen_opc_cc_op[pc_pos];
-    if (cc_op != CC_OP_DYNAMIC)
+    int cc_op = data[1];
+    env->eip = data[0] - tb->cs_base;
+    if (cc_op != CC_OP_DYNAMIC) {
         env->cc_op = cc_op;
+    }
 }