Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / arch / tile / kernel / unaligned.c
1 /*
2  * Copyright 2013 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * A code-rewriter that handles unaligned exception.
15  */
16
17 #include <linux/smp.h>
18 #include <linux/ptrace.h>
19 #include <linux/slab.h>
20 #include <linux/thread_info.h>
21 #include <linux/uaccess.h>
22 #include <linux/mman.h>
23 #include <linux/types.h>
24 #include <linux/err.h>
25 #include <linux/module.h>
26 #include <linux/compat.h>
27 #include <linux/prctl.h>
28 #include <linux/context_tracking.h>
29 #include <asm/cacheflush.h>
30 #include <asm/traps.h>
31 #include <asm/uaccess.h>
32 #include <asm/unaligned.h>
33 #include <arch/abi.h>
34 #include <arch/spr_def.h>
35 #include <arch/opcode.h>
36
37
38 /*
39  * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
40  * exception is supported out of single_step.c
41  */
42
43 int unaligned_printk;
44
45 static int __init setup_unaligned_printk(char *str)
46 {
47         long val;
48         if (kstrtol(str, 0, &val) != 0)
49                 return 0;
50         unaligned_printk = val;
51         pr_info("Printk for each unaligned data accesses is %s\n",
52                 unaligned_printk ? "enabled" : "disabled");
53         return 1;
54 }
55 __setup("unaligned_printk=", setup_unaligned_printk);
56
57 unsigned int unaligned_fixup_count;
58
59 #ifdef __tilegx__
60
61 /*
62  * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
63  * The 1st 64-bit word saves fault PC address, 2nd word is the fault
64  * instruction bundle followed by 14 JIT bundles.
65  */
66
67 struct unaligned_jit_fragment {
68         unsigned long       pc;
69         tilegx_bundle_bits  bundle;
70         tilegx_bundle_bits  insn[14];
71 };
72
73 /*
74  * Check if a nop or fnop at bundle's pipeline X0.
75  */
76
77 static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
78 {
79         return (((get_UnaryOpcodeExtension_X0(bundle) ==
80                   NOP_UNARY_OPCODE_X0) &&
81                  (get_RRROpcodeExtension_X0(bundle) ==
82                   UNARY_RRR_0_OPCODE_X0) &&
83                  (get_Opcode_X0(bundle) ==
84                   RRR_0_OPCODE_X0)) ||
85                 ((get_UnaryOpcodeExtension_X0(bundle) ==
86                   FNOP_UNARY_OPCODE_X0) &&
87                  (get_RRROpcodeExtension_X0(bundle) ==
88                   UNARY_RRR_0_OPCODE_X0) &&
89                  (get_Opcode_X0(bundle) ==
90                   RRR_0_OPCODE_X0)));
91 }
92
93 /*
94  * Check if nop or fnop at bundle's pipeline X1.
95  */
96
97 static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
98 {
99         return (((get_UnaryOpcodeExtension_X1(bundle) ==
100                   NOP_UNARY_OPCODE_X1) &&
101                  (get_RRROpcodeExtension_X1(bundle) ==
102                   UNARY_RRR_0_OPCODE_X1) &&
103                  (get_Opcode_X1(bundle) ==
104                   RRR_0_OPCODE_X1)) ||
105                 ((get_UnaryOpcodeExtension_X1(bundle) ==
106                   FNOP_UNARY_OPCODE_X1) &&
107                  (get_RRROpcodeExtension_X1(bundle) ==
108                   UNARY_RRR_0_OPCODE_X1) &&
109                  (get_Opcode_X1(bundle) ==
110                   RRR_0_OPCODE_X1)));
111 }
112
113 /*
114  * Check if nop or fnop at bundle's Y0 pipeline.
115  */
116
117 static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
118 {
119         return (((get_UnaryOpcodeExtension_Y0(bundle) ==
120                   NOP_UNARY_OPCODE_Y0) &&
121                  (get_RRROpcodeExtension_Y0(bundle) ==
122                   UNARY_RRR_1_OPCODE_Y0) &&
123                  (get_Opcode_Y0(bundle) ==
124                   RRR_1_OPCODE_Y0)) ||
125                 ((get_UnaryOpcodeExtension_Y0(bundle) ==
126                   FNOP_UNARY_OPCODE_Y0) &&
127                  (get_RRROpcodeExtension_Y0(bundle) ==
128                   UNARY_RRR_1_OPCODE_Y0) &&
129                  (get_Opcode_Y0(bundle) ==
130                   RRR_1_OPCODE_Y0)));
131 }
132
133 /*
134  * Check if nop or fnop at bundle's pipeline Y1.
135  */
136
137 static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
138 {
139         return (((get_UnaryOpcodeExtension_Y1(bundle) ==
140                   NOP_UNARY_OPCODE_Y1) &&
141                  (get_RRROpcodeExtension_Y1(bundle) ==
142                   UNARY_RRR_1_OPCODE_Y1) &&
143                  (get_Opcode_Y1(bundle) ==
144                   RRR_1_OPCODE_Y1)) ||
145                 ((get_UnaryOpcodeExtension_Y1(bundle) ==
146                   FNOP_UNARY_OPCODE_Y1) &&
147                  (get_RRROpcodeExtension_Y1(bundle) ==
148                   UNARY_RRR_1_OPCODE_Y1) &&
149                  (get_Opcode_Y1(bundle) ==
150                   RRR_1_OPCODE_Y1)));
151 }
152
153 /*
154  * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
155  */
156
157 static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
158 {
159         return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
160 }
161
162 /*
163  * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
164  */
165
166 static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
167 {
168         return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
169 }
170
171 /*
172  * Find the destination, source registers of fault unalign access instruction
173  * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
174  * clob3, which are guaranteed different from any register used in the fault
175  * bundle. r_alias is used to return if the other instructions other than the
176  * unalign load/store shares same register with ra, rb and rd.
177  */
178
179 static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
180                       uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
181                       uint64_t *clob3, bool *r_alias)
182 {
183         int i;
184         uint64_t reg;
185         uint64_t reg_map = 0, alias_reg_map = 0, map;
186         bool alias = false;
187
188         /*
189          * Parse fault bundle, find potential used registers and mark
190          * corresponding bits in reg_map and alias_map. These 2 bit maps
191          * are used to find the scratch registers and determine if there
192          * is register alais.
193          */
194         if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
195
196                 reg = get_SrcA_Y2(bundle);
197                 reg_map |= 1ULL << reg;
198                 *ra = reg;
199                 reg = get_SrcBDest_Y2(bundle);
200                 reg_map |= 1ULL << reg;
201
202                 if (rd) {
203                         /* Load. */
204                         *rd = reg;
205                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
206                 } else {
207                         /* Store. */
208                         *rb = reg;
209                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
210                 }
211
212                 if (!is_bundle_y1_nop(bundle)) {
213                         reg = get_SrcA_Y1(bundle);
214                         reg_map |= (1ULL << reg);
215                         map = (1ULL << reg);
216
217                         reg = get_SrcB_Y1(bundle);
218                         reg_map |= (1ULL << reg);
219                         map |= (1ULL << reg);
220
221                         reg = get_Dest_Y1(bundle);
222                         reg_map |= (1ULL << reg);
223                         map |= (1ULL << reg);
224
225                         if (map & alias_reg_map)
226                                 alias = true;
227                 }
228
229                 if (!is_bundle_y0_nop(bundle)) {
230                         reg = get_SrcA_Y0(bundle);
231                         reg_map |= (1ULL << reg);
232                         map = (1ULL << reg);
233
234                         reg = get_SrcB_Y0(bundle);
235                         reg_map |= (1ULL << reg);
236                         map |= (1ULL << reg);
237
238                         reg = get_Dest_Y0(bundle);
239                         reg_map |= (1ULL << reg);
240                         map |= (1ULL << reg);
241
242                         if (map & alias_reg_map)
243                                 alias = true;
244                 }
245         } else  { /* X Mode Bundle. */
246
247                 reg = get_SrcA_X1(bundle);
248                 reg_map |= (1ULL << reg);
249                 *ra = reg;
250                 if (rd) {
251                         /* Load. */
252                         reg = get_Dest_X1(bundle);
253                         reg_map |= (1ULL << reg);
254                         *rd = reg;
255                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
256                 } else {
257                         /* Store. */
258                         reg = get_SrcB_X1(bundle);
259                         reg_map |= (1ULL << reg);
260                         *rb = reg;
261                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
262                 }
263
264                 if (!is_bundle_x0_nop(bundle)) {
265                         reg = get_SrcA_X0(bundle);
266                         reg_map |= (1ULL << reg);
267                         map = (1ULL << reg);
268
269                         reg = get_SrcB_X0(bundle);
270                         reg_map |= (1ULL << reg);
271                         map |= (1ULL << reg);
272
273                         reg = get_Dest_X0(bundle);
274                         reg_map |= (1ULL << reg);
275                         map |= (1ULL << reg);
276
277                         if (map & alias_reg_map)
278                                 alias = true;
279                 }
280         }
281
282         /*
283          * "alias" indicates if the unalign access registers have collision
284          * with others in the same bundle. We jsut simply test all register
285          * operands case (RRR), ignored the case with immidate. If a bundle
286          * has no register alias, we may do fixup in a simple or fast manner.
287          * So if an immidata field happens to hit with a register, we may end
288          * up fall back to the generic handling.
289          */
290
291         *r_alias = alias;
292
293         /* Flip bits on reg_map. */
294         reg_map ^= -1ULL;
295
296         /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
297         for (i = 0; i < TREG_SP; i++) {
298                 if (reg_map & (0x1ULL << i)) {
299                         if (*clob1 == -1) {
300                                 *clob1 = i;
301                         } else if (*clob2 == -1) {
302                                 *clob2 = i;
303                         } else if (*clob3 == -1) {
304                                 *clob3 = i;
305                                 return;
306                         }
307                 }
308         }
309 }
310
311 /*
312  * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
313  * is unexpected.
314  */
315
316 static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
317                        uint64_t clob1, uint64_t clob2,  uint64_t clob3)
318 {
319         bool unexpected = false;
320         if ((ra >= 56) && (ra != TREG_ZERO))
321                 unexpected = true;
322
323         if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
324                 unexpected = true;
325
326         if (rd != -1) {
327                 if ((rd >= 56) && (rd != TREG_ZERO))
328                         unexpected = true;
329         } else {
330                 if ((rb >= 56) && (rb != TREG_ZERO))
331                         unexpected = true;
332         }
333         return unexpected;
334 }
335
336
337 #define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
338 #define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
339 #define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
340 #define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
341 #define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
342
343 #ifdef __LITTLE_ENDIAN
344 #define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
345 #else
346 #define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
347 #endif /* __LITTLE_ENDIAN */
348
349 /*
350  * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
351  * The corresponding static function jix_x#_###(.) generates partial or
352  * whole bundle based on the template and given arguments.
353  */
354
355 #define __JIT_CODE(_X_)                                         \
356         asm (".pushsection .rodata.unalign_data, \"a\"\n"       \
357              _X_"\n"                                            \
358              ".popsection\n")
359
360 __JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
361 static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
362 {
363         extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
364         return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
365                 create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
366 }
367
368 __JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
369 static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
370 {
371         extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
372         return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
373                 create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
374 }
375
376 __JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
377 static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
378 {
379         extern  tilegx_bundle_bits __unalign_jit_x0_addi;
380         return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
381                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
382                 create_Imm8_X0(imm8);
383 }
384
385 __JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
386 static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
387 {
388         extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
389         return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
390                 create_Dest_X1(rd) | create_SrcA_X1(ra);
391 }
392
393 __JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
394 static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
395 {
396         extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
397         return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
398                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
399                 create_SrcB_X0(rb);
400 }
401
402 __JIT_CODE("__unalign_jit_x1_iret:   {iret}");
403 static tilegx_bundle_bits  jit_x1_iret(void)
404 {
405         extern  tilegx_bundle_bits __unalign_jit_x1_iret;
406         return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
407 }
408
409 __JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
410 static tilegx_bundle_bits  jit_x0_fnop(void)
411 {
412         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
413         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
414 }
415
416 static tilegx_bundle_bits  jit_x1_fnop(void)
417 {
418         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
419         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
420 }
421
422 __JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
423 static tilegx_bundle_bits  jit_y2_dummy(void)
424 {
425         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
426         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
427 }
428
429 static tilegx_bundle_bits  jit_y1_fnop(void)
430 {
431         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
432         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
433 }
434
435 __JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
436 static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
437 {
438         extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
439         return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
440                 (~create_SrcA_X1(-1)) &
441                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
442                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
443 }
444
445 __JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
446 static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
447 {
448         extern  tilegx_bundle_bits __unalign_jit_x1_st;
449         return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
450                 create_SrcA_X1(ra) | create_SrcB_X1(rb);
451 }
452
453 __JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
454 static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
455 {
456         extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
457         return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
458                 (~create_SrcA_X1(-1)) &
459                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
460                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
461 }
462
463 __JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
464 static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
465 {
466         extern  tilegx_bundle_bits __unalign_jit_x1_ld;
467         return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
468                 create_Dest_X1(rd) | create_SrcA_X1(ra);
469 }
470
471 __JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
472 static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
473 {
474         extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
475         return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
476                 (~create_Dest_X1(-1)) &
477                 GX_INSN_X1_MASK) | create_Dest_X1(rd) |
478                 create_SrcA_X1(ra) | create_Imm8_X1(imm8);
479 }
480
481 __JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
482 static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
483 {
484         extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
485         return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
486                 GX_INSN_X0_MASK) |
487                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
488                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
489 }
490
491 __JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
492 static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
493 {
494         extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
495         return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
496                 GX_INSN_X0_MASK) |
497                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
498                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
499 }
500
501 __JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
502 static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
503 {
504         extern  tilegx_bundle_bits __unalign_jit_x1_addi;
505         return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
506                 create_Dest_X1(rd) | create_SrcA_X1(ra) |
507                 create_Imm8_X1(imm8);
508 }
509
510 __JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
511 static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
512 {
513         extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
514         return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
515                 GX_INSN_X0_MASK) |
516                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
517                 create_ShAmt_X0(imm6);
518 }
519
520 __JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
521 static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
522 {
523         extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
524         return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
525                 GX_INSN_X0_MASK) |
526                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
527                 create_ShAmt_X0(imm6);
528 }
529
530 __JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
531 static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
532 {
533         extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
534         return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
535                 GX_INSN_X1_MASK) |
536                 create_SrcA_X1(ra) | create_BrOff_X1(broff);
537 }
538
539 #undef __JIT_CODE
540
541 /*
542  * This function generates unalign fixup JIT.
543  *
544  * We first find unalign load/store instruction's destination, source
545  * registers: ra, rb and rd. and 3 scratch registers by calling
546  * find_regs(...). 3 scratch clobbers should not alias with any register
547  * used in the fault bundle. Then analyze the fault bundle to determine
548  * if it's a load or store, operand width, branch or address increment etc.
549  * At last generated JIT is copied into JIT code area in user space.
550  */
551
552 static
553 void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
554                     int align_ctl)
555 {
556         struct thread_info *info = current_thread_info();
557         struct unaligned_jit_fragment frag;
558         struct unaligned_jit_fragment *jit_code_area;
559         tilegx_bundle_bits bundle_2 = 0;
560         /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
561         bool     bundle_2_enable = true;
562         uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
563         /*
564          * Indicate if the unalign access
565          * instruction's registers hit with
566          * others in the same bundle.
567          */
568         bool     alias = false;
569         bool     load_n_store = true;
570         bool     load_store_signed = false;
571         unsigned int  load_store_size = 8;
572         bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
573         int      y1_br_reg = 0;
574         /* True for link operation. i.e. jalr or lnk at Y1 */
575         bool     y1_lr = false;
576         int      y1_lr_reg = 0;
577         bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
578         int      x1_add_imm8 = 0;
579         bool     unexpected = false;
580         int      n = 0, k;
581
582         jit_code_area =
583                 (struct unaligned_jit_fragment *)(info->unalign_jit_base);
584
585         memset((void *)&frag, 0, sizeof(frag));
586
587         /* 0: X mode, Otherwise: Y mode. */
588         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
589                 unsigned int mod, opcode;
590
591                 if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
592                     get_RRROpcodeExtension_Y1(bundle) ==
593                     UNARY_RRR_1_OPCODE_Y1) {
594
595                         opcode = get_UnaryOpcodeExtension_Y1(bundle);
596
597                         /*
598                          * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
599                          * pipeline.
600                          */
601                         switch (opcode) {
602                         case JALR_UNARY_OPCODE_Y1:
603                         case JALRP_UNARY_OPCODE_Y1:
604                                 y1_lr = true;
605                                 y1_lr_reg = 55; /* Link register. */
606                                 /* FALLTHROUGH */
607                         case JR_UNARY_OPCODE_Y1:
608                         case JRP_UNARY_OPCODE_Y1:
609                                 y1_br = true;
610                                 y1_br_reg = get_SrcA_Y1(bundle);
611                                 break;
612                         case LNK_UNARY_OPCODE_Y1:
613                                 /* "lnk" at Y1 pipeline. */
614                                 y1_lr = true;
615                                 y1_lr_reg = get_Dest_Y1(bundle);
616                                 break;
617                         }
618                 }
619
620                 opcode = get_Opcode_Y2(bundle);
621                 mod = get_Mode(bundle);
622
623                 /*
624                  *  bundle_2 is bundle after making Y2 as a dummy operation
625                  *  - ld zero, sp
626                  */
627                 bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
628
629                 /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
630                 if (y1_br || y1_lr) {
631                         bundle_2 &= ~(GX_INSN_Y1_MASK);
632                         bundle_2 |= jit_y1_fnop();
633                 }
634
635                 if (is_y0_y1_nop(bundle_2))
636                         bundle_2_enable = false;
637
638                 if (mod == MODE_OPCODE_YC2) {
639                         /* Store. */
640                         load_n_store = false;
641                         load_store_size = 1 << opcode;
642                         load_store_signed = false;
643                         find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
644                                   &clob3, &alias);
645                         if (load_store_size > 8)
646                                 unexpected = true;
647                 } else {
648                         /* Load. */
649                         load_n_store = true;
650                         if (mod == MODE_OPCODE_YB2) {
651                                 switch (opcode) {
652                                 case LD_OPCODE_Y2:
653                                         load_store_signed = false;
654                                         load_store_size = 8;
655                                         break;
656                                 case LD4S_OPCODE_Y2:
657                                         load_store_signed = true;
658                                         load_store_size = 4;
659                                         break;
660                                 case LD4U_OPCODE_Y2:
661                                         load_store_signed = false;
662                                         load_store_size = 4;
663                                         break;
664                                 default:
665                                         unexpected = true;
666                                 }
667                         } else if (mod == MODE_OPCODE_YA2) {
668                                 if (opcode == LD2S_OPCODE_Y2) {
669                                         load_store_signed = true;
670                                         load_store_size = 2;
671                                 } else if (opcode == LD2U_OPCODE_Y2) {
672                                         load_store_signed = false;
673                                         load_store_size = 2;
674                                 } else
675                                         unexpected = true;
676                         } else
677                                 unexpected = true;
678                         find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
679                                   &clob3, &alias);
680                 }
681         } else {
682                 unsigned int opcode;
683
684                 /* bundle_2 is bundle after making X1 as "fnop". */
685                 bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
686
687                 if (is_x0_x1_nop(bundle_2))
688                         bundle_2_enable = false;
689
690                 if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
691                         opcode = get_UnaryOpcodeExtension_X1(bundle);
692
693                         if (get_RRROpcodeExtension_X1(bundle) ==
694                             UNARY_RRR_0_OPCODE_X1) {
695                                 load_n_store = true;
696                                 find_regs(bundle, &rd, &ra, &rb, &clob1,
697                                           &clob2, &clob3, &alias);
698
699                                 switch (opcode) {
700                                 case LD_UNARY_OPCODE_X1:
701                                         load_store_signed = false;
702                                         load_store_size = 8;
703                                         break;
704                                 case LD4S_UNARY_OPCODE_X1:
705                                         load_store_signed = true;
706                                         /* FALLTHROUGH */
707                                 case LD4U_UNARY_OPCODE_X1:
708                                         load_store_size = 4;
709                                         break;
710
711                                 case LD2S_UNARY_OPCODE_X1:
712                                         load_store_signed = true;
713                                         /* FALLTHROUGH */
714                                 case LD2U_UNARY_OPCODE_X1:
715                                         load_store_size = 2;
716                                         break;
717                                 default:
718                                         unexpected = true;
719                                 }
720                         } else {
721                                 load_n_store = false;
722                                 load_store_signed = false;
723                                 find_regs(bundle, 0, &ra, &rb,
724                                           &clob1, &clob2, &clob3,
725                                           &alias);
726
727                                 opcode = get_RRROpcodeExtension_X1(bundle);
728                                 switch (opcode) {
729                                 case ST_RRR_0_OPCODE_X1:
730                                         load_store_size = 8;
731                                         break;
732                                 case ST4_RRR_0_OPCODE_X1:
733                                         load_store_size = 4;
734                                         break;
735                                 case ST2_RRR_0_OPCODE_X1:
736                                         load_store_size = 2;
737                                         break;
738                                 default:
739                                         unexpected = true;
740                                 }
741                         }
742                 } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
743                         load_n_store = true;
744                         opcode = get_Imm8OpcodeExtension_X1(bundle);
745                         switch (opcode) {
746                         case LD_ADD_IMM8_OPCODE_X1:
747                                 load_store_size = 8;
748                                 break;
749
750                         case LD4S_ADD_IMM8_OPCODE_X1:
751                                 load_store_signed = true;
752                                 /* FALLTHROUGH */
753                         case LD4U_ADD_IMM8_OPCODE_X1:
754                                 load_store_size = 4;
755                                 break;
756
757                         case LD2S_ADD_IMM8_OPCODE_X1:
758                                 load_store_signed = true;
759                                 /* FALLTHROUGH */
760                         case LD2U_ADD_IMM8_OPCODE_X1:
761                                 load_store_size = 2;
762                                 break;
763
764                         case ST_ADD_IMM8_OPCODE_X1:
765                                 load_n_store = false;
766                                 load_store_size = 8;
767                                 break;
768                         case ST4_ADD_IMM8_OPCODE_X1:
769                                 load_n_store = false;
770                                 load_store_size = 4;
771                                 break;
772                         case ST2_ADD_IMM8_OPCODE_X1:
773                                 load_n_store = false;
774                                 load_store_size = 2;
775                                 break;
776                         default:
777                                 unexpected = true;
778                         }
779
780                         if (!unexpected) {
781                                 x1_add = true;
782                                 if (load_n_store)
783                                         x1_add_imm8 = get_Imm8_X1(bundle);
784                                 else
785                                         x1_add_imm8 = get_Dest_Imm8_X1(bundle);
786                         }
787
788                         find_regs(bundle, load_n_store ? (&rd) : NULL,
789                                   &ra, &rb, &clob1, &clob2, &clob3, &alias);
790                 } else
791                         unexpected = true;
792         }
793
794         /*
795          * Some sanity check for register numbers extracted from fault bundle.
796          */
797         if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
798                 unexpected = true;
799
800         /* Give warning if register ra has an aligned address. */
801         if (!unexpected)
802                 WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
803
804
805         /*
806          * Fault came from kernel space, here we only need take care of
807          * unaligned "get_user/put_user" macros defined in "uaccess.h".
808          * Basically, we will handle bundle like this:
809          * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
810          * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
811          * For either load or store, byte-wise operation is performed by calling
812          * get_user() or put_user(). If the macro returns non-zero value,
813          * set the value to rx, otherwise set zero to rx. Finally make pc point
814          * to next bundle and return.
815          */
816
817         if (EX1_PL(regs->ex1) != USER_PL) {
818
819                 unsigned long rx = 0;
820                 unsigned long x = 0, ret = 0;
821
822                 if (y1_br || y1_lr || x1_add ||
823                     (load_store_signed !=
824                      (load_n_store && load_store_size == 4))) {
825                         /* No branch, link, wrong sign-ext or load/store add. */
826                         unexpected = true;
827                 } else if (!unexpected) {
828                         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
829                                 /*
830                                  * Fault bundle is Y mode.
831                                  * Check if the Y1 and Y0 is the form of
832                                  * { movei rx, 0; nop/fnop }, if yes,
833                                  * find the rx.
834                                  */
835
836                                 if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
837                                     && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
838                                     (get_Imm8_Y1(bundle) == 0) &&
839                                     is_bundle_y0_nop(bundle)) {
840                                         rx = get_Dest_Y1(bundle);
841                                 } else if ((get_Opcode_Y0(bundle) ==
842                                             ADDI_OPCODE_Y0) &&
843                                            (get_SrcA_Y0(bundle) == TREG_ZERO) &&
844                                            (get_Imm8_Y0(bundle) == 0) &&
845                                            is_bundle_y1_nop(bundle)) {
846                                         rx = get_Dest_Y0(bundle);
847                                 } else {
848                                         unexpected = true;
849                                 }
850                         } else {
851                                 /*
852                                  * Fault bundle is X mode.
853                                  * Check if the X0 is 'movei rx, 0',
854                                  * if yes, find the rx.
855                                  */
856
857                                 if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
858                                     && (get_Imm8OpcodeExtension_X0(bundle) ==
859                                         ADDI_IMM8_OPCODE_X0) &&
860                                     (get_SrcA_X0(bundle) == TREG_ZERO) &&
861                                     (get_Imm8_X0(bundle) == 0)) {
862                                         rx = get_Dest_X0(bundle);
863                                 } else {
864                                         unexpected = true;
865                                 }
866                         }
867
868                         /* rx should be less than 56. */
869                         if (!unexpected && (rx >= 56))
870                                 unexpected = true;
871                 }
872
873                 if (!search_exception_tables(regs->pc)) {
874                         /* No fixup in the exception tables for the pc. */
875                         unexpected = true;
876                 }
877
878                 if (unexpected) {
879                         /* Unexpected unalign kernel fault. */
880                         struct task_struct *tsk = validate_current();
881
882                         bust_spinlocks(1);
883
884                         show_regs(regs);
885
886                         if (unlikely(tsk->pid < 2)) {
887                                 panic("Kernel unalign fault running %s!",
888                                       tsk->pid ? "init" : "the idle task");
889                         }
890 #ifdef SUPPORT_DIE
891                         die("Oops", regs);
892 #endif
893                         bust_spinlocks(1);
894
895                         do_group_exit(SIGKILL);
896
897                 } else {
898                         unsigned long i, b = 0;
899                         unsigned char *ptr =
900                                 (unsigned char *)regs->regs[ra];
901                         if (load_n_store) {
902                                 /* handle get_user(x, ptr) */
903                                 for (i = 0; i < load_store_size; i++) {
904                                         ret = get_user(b, ptr++);
905                                         if (!ret) {
906                                                 /* Success! update x. */
907 #ifdef __LITTLE_ENDIAN
908                                                 x |= (b << (8 * i));
909 #else
910                                                 x <<= 8;
911                                                 x |= b;
912 #endif /* __LITTLE_ENDIAN */
913                                         } else {
914                                                 x = 0;
915                                                 break;
916                                         }
917                                 }
918
919                                 /* Sign-extend 4-byte loads. */
920                                 if (load_store_size == 4)
921                                         x = (long)(int)x;
922
923                                 /* Set register rd. */
924                                 regs->regs[rd] = x;
925
926                                 /* Set register rx. */
927                                 regs->regs[rx] = ret;
928
929                                 /* Bump pc. */
930                                 regs->pc += 8;
931
932                         } else {
933                                 /* Handle put_user(x, ptr) */
934                                 x = regs->regs[rb];
935 #ifdef __LITTLE_ENDIAN
936                                 b = x;
937 #else
938                                 /*
939                                  * Swap x in order to store x from low
940                                  * to high memory same as the
941                                  * little-endian case.
942                                  */
943                                 switch (load_store_size) {
944                                 case 8:
945                                         b = swab64(x);
946                                         break;
947                                 case 4:
948                                         b = swab32(x);
949                                         break;
950                                 case 2:
951                                         b = swab16(x);
952                                         break;
953                                 }
954 #endif /* __LITTLE_ENDIAN */
955                                 for (i = 0; i < load_store_size; i++) {
956                                         ret = put_user(b, ptr++);
957                                         if (ret)
958                                                 break;
959                                         /* Success! shift 1 byte. */
960                                         b >>= 8;
961                                 }
962                                 /* Set register rx. */
963                                 regs->regs[rx] = ret;
964
965                                 /* Bump pc. */
966                                 regs->pc += 8;
967                         }
968                 }
969
970                 unaligned_fixup_count++;
971
972                 if (unaligned_printk) {
973                         pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
974                                 current->comm, current->pid, regs->regs[ra]);
975                 }
976
977                 /* Done! Return to the exception handler. */
978                 return;
979         }
980
981         if ((align_ctl == 0) || unexpected) {
982                 siginfo_t info = {
983                         .si_signo = SIGBUS,
984                         .si_code = BUS_ADRALN,
985                         .si_addr = (unsigned char __user *)0
986                 };
987                 if (unaligned_printk)
988                         pr_info("Unalign bundle: unexp @%llx, %llx\n",
989                                 (unsigned long long)regs->pc,
990                                 (unsigned long long)bundle);
991
992                 if (ra < 56) {
993                         unsigned long uaa = (unsigned long)regs->regs[ra];
994                         /* Set bus Address. */
995                         info.si_addr = (unsigned char __user *)uaa;
996                 }
997
998                 unaligned_fixup_count++;
999
1000                 trace_unhandled_signal("unaligned fixup trap", regs,
1001                                        (unsigned long)info.si_addr, SIGBUS);
1002                 force_sig_info(info.si_signo, &info, current);
1003                 return;
1004         }
1005
1006 #ifdef __LITTLE_ENDIAN
1007 #define UA_FIXUP_ADDR_DELTA          1
1008 #define UA_FIXUP_BFEXT_START(_B_)    0
1009 #define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1010 #else /* __BIG_ENDIAN */
1011 #define UA_FIXUP_ADDR_DELTA          -1
1012 #define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1013 #define UA_FIXUP_BFEXT_END(_B_)      63
1014 #endif /* __LITTLE_ENDIAN */
1015
1016
1017
1018         if ((ra != rb) && (rd != TREG_SP) && !alias &&
1019             !y1_br && !y1_lr && !x1_add) {
1020                 /*
1021                  * Simple case: ra != rb and no register alias found,
1022                  * and no branch or link. This will be the majority.
1023                  * We can do a little better for simplae case than the
1024                  * generic scheme below.
1025                  */
1026                 if (!load_n_store) {
1027                         /*
1028                          * Simple store: ra != rb, no need for scratch register.
1029                          * Just store and rotate to right bytewise.
1030                          */
1031 #ifdef __BIG_ENDIAN
1032                         frag.insn[n++] =
1033                                 jit_x0_addi(ra, ra, load_store_size - 1) |
1034                                 jit_x1_fnop();
1035 #endif /* __BIG_ENDIAN */
1036                         for (k = 0; k < load_store_size; k++) {
1037                                 /* Store a byte. */
1038                                 frag.insn[n++] =
1039                                         jit_x0_rotli(rb, rb, 56) |
1040                                         jit_x1_st1_add(ra, rb,
1041                                                        UA_FIXUP_ADDR_DELTA);
1042                         }
1043 #ifdef __BIG_ENDIAN
1044                         frag.insn[n] = jit_x1_addi(ra, ra, 1);
1045 #else
1046                         frag.insn[n] = jit_x1_addi(ra, ra,
1047                                                    -1 * load_store_size);
1048 #endif /* __LITTLE_ENDIAN */
1049
1050                         if (load_store_size == 8) {
1051                                 frag.insn[n] |= jit_x0_fnop();
1052                         } else if (load_store_size == 4) {
1053                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1054                         } else { /* = 2 */
1055                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1056                         }
1057                         n++;
1058                         if (bundle_2_enable)
1059                                 frag.insn[n++] = bundle_2;
1060                         frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1061                 } else {
1062                         if (rd == ra) {
1063                                 /* Use two clobber registers: clob1/2. */
1064                                 frag.insn[n++] =
1065                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1066                                         jit_x1_fnop();
1067                                 frag.insn[n++] =
1068                                         jit_x0_addi(clob1, ra, 7) |
1069                                         jit_x1_st_add(TREG_SP, clob1, -8);
1070                                 frag.insn[n++] =
1071                                         jit_x0_addi(clob2, ra, 0) |
1072                                         jit_x1_st(TREG_SP, clob2);
1073                                 frag.insn[n++] =
1074                                         jit_x0_fnop() |
1075                                         jit_x1_ldna(rd, ra);
1076                                 frag.insn[n++] =
1077                                         jit_x0_fnop() |
1078                                         jit_x1_ldna(clob1, clob1);
1079                                 /*
1080                                  * Note: we must make sure that rd must not
1081                                  * be sp. Recover clob1/2 from stack.
1082                                  */
1083                                 frag.insn[n++] =
1084                                         jit_x0_dblalign(rd, clob1, clob2) |
1085                                         jit_x1_ld_add(clob2, TREG_SP, 8);
1086                                 frag.insn[n++] =
1087                                         jit_x0_fnop() |
1088                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1089                         } else {
1090                                 /* Use one clobber register: clob1 only. */
1091                                 frag.insn[n++] =
1092                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1093                                         jit_x1_fnop();
1094                                 frag.insn[n++] =
1095                                         jit_x0_addi(clob1, ra, 7) |
1096                                         jit_x1_st(TREG_SP, clob1);
1097                                 frag.insn[n++] =
1098                                         jit_x0_fnop() |
1099                                         jit_x1_ldna(rd, ra);
1100                                 frag.insn[n++] =
1101                                         jit_x0_fnop() |
1102                                         jit_x1_ldna(clob1, clob1);
1103                                 /*
1104                                  * Note: we must make sure that rd must not
1105                                  * be sp. Recover clob1 from stack.
1106                                  */
1107                                 frag.insn[n++] =
1108                                         jit_x0_dblalign(rd, clob1, ra) |
1109                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1110                         }
1111
1112                         if (bundle_2_enable)
1113                                 frag.insn[n++] = bundle_2;
1114                         /*
1115                          * For non 8-byte load, extract corresponding bytes and
1116                          * signed extension.
1117                          */
1118                         if (load_store_size == 4) {
1119                                 if (load_store_signed)
1120                                         frag.insn[n++] =
1121                                                 jit_x0_bfexts(
1122                                                         rd, rd,
1123                                                         UA_FIXUP_BFEXT_START(4),
1124                                                         UA_FIXUP_BFEXT_END(4)) |
1125                                                 jit_x1_fnop();
1126                                 else
1127                                         frag.insn[n++] =
1128                                                 jit_x0_bfextu(
1129                                                         rd, rd,
1130                                                         UA_FIXUP_BFEXT_START(4),
1131                                                         UA_FIXUP_BFEXT_END(4)) |
1132                                                 jit_x1_fnop();
1133                         } else if (load_store_size == 2) {
1134                                 if (load_store_signed)
1135                                         frag.insn[n++] =
1136                                                 jit_x0_bfexts(
1137                                                         rd, rd,
1138                                                         UA_FIXUP_BFEXT_START(2),
1139                                                         UA_FIXUP_BFEXT_END(2)) |
1140                                                 jit_x1_fnop();
1141                                 else
1142                                         frag.insn[n++] =
1143                                                 jit_x0_bfextu(
1144                                                         rd, rd,
1145                                                         UA_FIXUP_BFEXT_START(2),
1146                                                         UA_FIXUP_BFEXT_END(2)) |
1147                                                 jit_x1_fnop();
1148                         }
1149
1150                         frag.insn[n++] =
1151                                 jit_x0_fnop()  |
1152                                 jit_x1_iret();
1153                 }
1154         } else if (!load_n_store) {
1155
1156                 /*
1157                  * Generic memory store cases: use 3 clobber registers.
1158                  *
1159                  * Alloc space for saveing clob2,1,3 on user's stack.
1160                  * register clob3 points to where clob2 saved, followed by
1161                  * clob1 and 3 from high to low memory.
1162                  */
1163                 frag.insn[n++] =
1164                         jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1165                         jit_x1_fnop();
1166                 frag.insn[n++] =
1167                         jit_x0_addi(clob3, TREG_SP, 16)  |
1168                         jit_x1_st_add(TREG_SP, clob3, 8);
1169 #ifdef __LITTLE_ENDIAN
1170                 frag.insn[n++] =
1171                         jit_x0_addi(clob1, ra, 0)   |
1172                         jit_x1_st_add(TREG_SP, clob1, 8);
1173 #else
1174                 frag.insn[n++] =
1175                         jit_x0_addi(clob1, ra, load_store_size - 1)   |
1176                         jit_x1_st_add(TREG_SP, clob1, 8);
1177 #endif
1178                 if (load_store_size == 8) {
1179                         /*
1180                          * We save one byte a time, not for fast, but compact
1181                          * code. After each store, data source register shift
1182                          * right one byte. unchanged after 8 stores.
1183                          */
1184                         frag.insn[n++] =
1185                                 jit_x0_addi(clob2, TREG_ZERO, 7)     |
1186                                 jit_x1_st_add(TREG_SP, clob2, 16);
1187                         frag.insn[n++] =
1188                                 jit_x0_rotli(rb, rb, 56)      |
1189                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1190                         frag.insn[n++] =
1191                                 jit_x0_addi(clob2, clob2, -1) |
1192                                 jit_x1_bnezt(clob2, -1);
1193                         frag.insn[n++] =
1194                                 jit_x0_fnop()                 |
1195                                 jit_x1_addi(clob2, y1_br_reg, 0);
1196                 } else if (load_store_size == 4) {
1197                         frag.insn[n++] =
1198                                 jit_x0_addi(clob2, TREG_ZERO, 3)     |
1199                                 jit_x1_st_add(TREG_SP, clob2, 16);
1200                         frag.insn[n++] =
1201                                 jit_x0_rotli(rb, rb, 56)      |
1202                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1203                         frag.insn[n++] =
1204                                 jit_x0_addi(clob2, clob2, -1) |
1205                                 jit_x1_bnezt(clob2, -1);
1206                         /*
1207                          * same as 8-byte case, but need shift another 4
1208                          * byte to recover rb for 4-byte store.
1209                          */
1210                         frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1211                                 jit_x1_addi(clob2, y1_br_reg, 0);
1212                 } else { /* =2 */
1213                         frag.insn[n++] =
1214                                 jit_x0_addi(clob2, rb, 0)     |
1215                                 jit_x1_st_add(TREG_SP, clob2, 16);
1216                         for (k = 0; k < 2; k++) {
1217                                 frag.insn[n++] =
1218                                         jit_x0_shrui(rb, rb, 8)  |
1219                                         jit_x1_st1_add(clob1, rb,
1220                                                        UA_FIXUP_ADDR_DELTA);
1221                         }
1222                         frag.insn[n++] =
1223                                 jit_x0_addi(rb, clob2, 0)       |
1224                                 jit_x1_addi(clob2, y1_br_reg, 0);
1225                 }
1226
1227                 if (bundle_2_enable)
1228                         frag.insn[n++] = bundle_2;
1229
1230                 if (y1_lr) {
1231                         frag.insn[n++] =
1232                                 jit_x0_fnop()                    |
1233                                 jit_x1_mfspr(y1_lr_reg,
1234                                              SPR_EX_CONTEXT_0_0);
1235                 }
1236                 if (y1_br) {
1237                         frag.insn[n++] =
1238                                 jit_x0_fnop()                    |
1239                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1240                                              clob2);
1241                 }
1242                 if (x1_add) {
1243                         frag.insn[n++] =
1244                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1245                                 jit_x1_ld_add(clob2, clob3, -8);
1246                 } else {
1247                         frag.insn[n++] =
1248                                 jit_x0_fnop()                    |
1249                                 jit_x1_ld_add(clob2, clob3, -8);
1250                 }
1251                 frag.insn[n++] =
1252                         jit_x0_fnop()   |
1253                         jit_x1_ld_add(clob1, clob3, -8);
1254                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1255                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1256
1257         } else {
1258                 /*
1259                  * Generic memory load cases.
1260                  *
1261                  * Alloc space for saveing clob1,2,3 on user's stack.
1262                  * register clob3 points to where clob1 saved, followed
1263                  * by clob2 and 3 from high to low memory.
1264                  */
1265
1266                 frag.insn[n++] =
1267                         jit_x0_addi(TREG_SP, TREG_SP, -32) |
1268                         jit_x1_fnop();
1269                 frag.insn[n++] =
1270                         jit_x0_addi(clob3, TREG_SP, 16) |
1271                         jit_x1_st_add(TREG_SP, clob3, 8);
1272                 frag.insn[n++] =
1273                         jit_x0_addi(clob2, ra, 0) |
1274                         jit_x1_st_add(TREG_SP, clob2, 8);
1275
1276                 if (y1_br) {
1277                         frag.insn[n++] =
1278                                 jit_x0_addi(clob1, y1_br_reg, 0) |
1279                                 jit_x1_st_add(TREG_SP, clob1, 16);
1280                 } else {
1281                         frag.insn[n++] =
1282                                 jit_x0_fnop() |
1283                                 jit_x1_st_add(TREG_SP, clob1, 16);
1284                 }
1285
1286                 if (bundle_2_enable)
1287                         frag.insn[n++] = bundle_2;
1288
1289                 if (y1_lr) {
1290                         frag.insn[n++] =
1291                                 jit_x0_fnop()  |
1292                                 jit_x1_mfspr(y1_lr_reg,
1293                                              SPR_EX_CONTEXT_0_0);
1294                 }
1295
1296                 if (y1_br) {
1297                         frag.insn[n++] =
1298                                 jit_x0_fnop() |
1299                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1300                                              clob1);
1301                 }
1302
1303                 frag.insn[n++] =
1304                         jit_x0_addi(clob1, clob2, 7)      |
1305                         jit_x1_ldna(rd, clob2);
1306                 frag.insn[n++] =
1307                         jit_x0_fnop()                     |
1308                         jit_x1_ldna(clob1, clob1);
1309                 frag.insn[n++] =
1310                         jit_x0_dblalign(rd, clob1, clob2) |
1311                         jit_x1_ld_add(clob1, clob3, -8);
1312                 if (x1_add) {
1313                         frag.insn[n++] =
1314                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1315                                 jit_x1_ld_add(clob2, clob3, -8);
1316                 } else {
1317                         frag.insn[n++] =
1318                                 jit_x0_fnop()  |
1319                                 jit_x1_ld_add(clob2, clob3, -8);
1320                 }
1321
1322                 frag.insn[n++] =
1323                         jit_x0_fnop() |
1324                         jit_x1_ld(clob3, clob3);
1325
1326                 if (load_store_size == 4) {
1327                         if (load_store_signed)
1328                                 frag.insn[n++] =
1329                                         jit_x0_bfexts(
1330                                                 rd, rd,
1331                                                 UA_FIXUP_BFEXT_START(4),
1332                                                 UA_FIXUP_BFEXT_END(4)) |
1333                                         jit_x1_fnop();
1334                         else
1335                                 frag.insn[n++] =
1336                                         jit_x0_bfextu(
1337                                                 rd, rd,
1338                                                 UA_FIXUP_BFEXT_START(4),
1339                                                 UA_FIXUP_BFEXT_END(4)) |
1340                                         jit_x1_fnop();
1341                 } else if (load_store_size == 2) {
1342                         if (load_store_signed)
1343                                 frag.insn[n++] =
1344                                         jit_x0_bfexts(
1345                                                 rd, rd,
1346                                                 UA_FIXUP_BFEXT_START(2),
1347                                                 UA_FIXUP_BFEXT_END(2)) |
1348                                         jit_x1_fnop();
1349                         else
1350                                 frag.insn[n++] =
1351                                         jit_x0_bfextu(
1352                                                 rd, rd,
1353                                                 UA_FIXUP_BFEXT_START(2),
1354                                                 UA_FIXUP_BFEXT_END(2)) |
1355                                         jit_x1_fnop();
1356                 }
1357
1358                 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1359         }
1360
1361         /* Max JIT bundle count is 14. */
1362         WARN_ON(n > 14);
1363
1364         if (!unexpected) {
1365                 int status = 0;
1366                 int idx = (regs->pc >> 3) &
1367                         ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1368
1369                 frag.pc = regs->pc;
1370                 frag.bundle = bundle;
1371
1372                 if (unaligned_printk) {
1373                         pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1374                                 current->comm, current->pid,
1375                                 (unsigned long)frag.pc,
1376                                 (unsigned long)frag.bundle,
1377                                 (int)alias, (int)rd, (int)ra,
1378                                 (int)rb, (int)bundle_2_enable,
1379                                 (int)y1_lr, (int)y1_br, (int)x1_add);
1380
1381                         for (k = 0; k < n; k += 2)
1382                                 pr_info("[%d] %016llx %016llx\n",
1383                                         k, (unsigned long long)frag.insn[k],
1384                                         (unsigned long long)frag.insn[k+1]);
1385                 }
1386
1387                 /* Swap bundle byte order for big endian sys. */
1388 #ifdef __BIG_ENDIAN
1389                 frag.bundle = GX_INSN_BSWAP(frag.bundle);
1390                 for (k = 0; k < n; k++)
1391                         frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1392 #endif /* __BIG_ENDIAN */
1393
1394                 status = copy_to_user((void __user *)&jit_code_area[idx],
1395                                       &frag, sizeof(frag));
1396                 if (status) {
1397                         /* Fail to copy JIT into user land. send SIGSEGV. */
1398                         siginfo_t info = {
1399                                 .si_signo = SIGSEGV,
1400                                 .si_code = SEGV_MAPERR,
1401                                 .si_addr = (void __user *)&jit_code_area[idx]
1402                         };
1403
1404                         pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1405                                 current->pid, current->comm,
1406                                 (unsigned long long)&jit_code_area[idx]);
1407
1408                         trace_unhandled_signal("segfault in unalign fixup",
1409                                                regs,
1410                                                (unsigned long)info.si_addr,
1411                                                SIGSEGV);
1412                         force_sig_info(info.si_signo, &info, current);
1413                         return;
1414                 }
1415
1416
1417                 /* Do a cheaper increment, not accurate. */
1418                 unaligned_fixup_count++;
1419                 __flush_icache_range((unsigned long)&jit_code_area[idx],
1420                                      (unsigned long)&jit_code_area[idx] +
1421                                      sizeof(frag));
1422
1423                 /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1424                 __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1425                 __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1426
1427                 /* Modify pc at the start of new JIT. */
1428                 regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1429                 /* Set ICS in SPR_EX_CONTEXT_K_1. */
1430                 regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1431         }
1432 }
1433
1434
1435 /*
1436  * C function to generate unalign data JIT. Called from unalign data
1437  * interrupt handler.
1438  *
1439  * First check if unalign fix is disabled or exception did not not come from
1440  * user space or sp register points to unalign address, if true, generate a
1441  * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1442  * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1443  * back to exception handler.
1444  *
1445  * The exception handler will "iret" to new generated JIT code after
1446  * restoring caller saved registers. In theory, the JIT code will perform
1447  * another "iret" to resume user's program.
1448  */
1449
1450 void do_unaligned(struct pt_regs *regs, int vecnum)
1451 {
1452         enum ctx_state prev_state = exception_enter();
1453         tilegx_bundle_bits __user  *pc;
1454         tilegx_bundle_bits bundle;
1455         struct thread_info *info = current_thread_info();
1456         int align_ctl;
1457
1458         /* Checks the per-process unaligned JIT flags */
1459         align_ctl = unaligned_fixup;
1460         switch (task_thread_info(current)->align_ctl) {
1461         case PR_UNALIGN_NOPRINT:
1462                 align_ctl = 1;
1463                 break;
1464         case PR_UNALIGN_SIGBUS:
1465                 align_ctl = 0;
1466                 break;
1467         }
1468
1469         /* Enable iterrupt in order to access user land. */
1470         local_irq_enable();
1471
1472         /*
1473          * The fault came from kernel space. Two choices:
1474          * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1475          *     to return -EFAULT. If no fixup, simply panic the kernel.
1476          * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1477          *     if it was triggered by get_user/put_user() macros. Panic the
1478          *     kernel if it is not fixable.
1479          */
1480
1481         if (EX1_PL(regs->ex1) != USER_PL) {
1482
1483                 if (align_ctl < 1) {
1484                         unaligned_fixup_count++;
1485                         /* If exception came from kernel, try fix it up. */
1486                         if (fixup_exception(regs)) {
1487                                 if (unaligned_printk)
1488                                         pr_info("Unalign fixup: %d %llx @%llx\n",
1489                                                 (int)unaligned_fixup,
1490                                                 (unsigned long long)regs->ex1,
1491                                                 (unsigned long long)regs->pc);
1492                         } else {
1493                                 /* Not fixable. Go panic. */
1494                                 panic("Unalign exception in Kernel. pc=%lx",
1495                                       regs->pc);
1496                         }
1497                 } else {
1498                         /*
1499                          * Try to fix the exception. If we can't, panic the
1500                          * kernel.
1501                          */
1502                         bundle = GX_INSN_BSWAP(
1503                                 *((tilegx_bundle_bits *)(regs->pc)));
1504                         jit_bundle_gen(regs, bundle, align_ctl);
1505                 }
1506                 goto done;
1507         }
1508
1509         /*
1510          * Fault came from user with ICS or stack is not aligned.
1511          * If so, we will trigger SIGBUS.
1512          */
1513         if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1514                 siginfo_t info = {
1515                         .si_signo = SIGBUS,
1516                         .si_code = BUS_ADRALN,
1517                         .si_addr = (unsigned char __user *)0
1518                 };
1519
1520                 if (unaligned_printk)
1521                         pr_info("Unalign fixup: %d %llx @%llx\n",
1522                                 (int)unaligned_fixup,
1523                                 (unsigned long long)regs->ex1,
1524                                 (unsigned long long)regs->pc);
1525
1526                 unaligned_fixup_count++;
1527
1528                 trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1529                 force_sig_info(info.si_signo, &info, current);
1530                 goto done;
1531         }
1532
1533
1534         /* Read the bundle casued the exception! */
1535         pc = (tilegx_bundle_bits __user *)(regs->pc);
1536         if (get_user(bundle, pc) != 0) {
1537                 /* Probably never be here since pc is valid user address.*/
1538                 siginfo_t info = {
1539                         .si_signo = SIGSEGV,
1540                         .si_code = SEGV_MAPERR,
1541                         .si_addr = (void __user *)pc
1542                 };
1543                 pr_err("Couldn't read instruction at %p trying to step\n", pc);
1544                 trace_unhandled_signal("segfault in unalign fixup", regs,
1545                                        (unsigned long)info.si_addr, SIGSEGV);
1546                 force_sig_info(info.si_signo, &info, current);
1547                 goto done;
1548         }
1549
1550         if (!info->unalign_jit_base) {
1551                 void __user *user_page;
1552
1553                 /*
1554                  * Allocate a page in userland.
1555                  * For 64-bit processes we try to place the mapping far
1556                  * from anything else that might be going on (specifically
1557                  * 64 GB below the top of the user address space).  If it
1558                  * happens not to be possible to put it there, it's OK;
1559                  * the kernel will choose another location and we'll
1560                  * remember it for later.
1561                  */
1562                 if (is_compat_task())
1563                         user_page = NULL;
1564                 else
1565                         user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1566                                 (current->pid << PAGE_SHIFT);
1567
1568                 user_page = (void __user *) vm_mmap(NULL,
1569                                                     (unsigned long)user_page,
1570                                                     PAGE_SIZE,
1571                                                     PROT_EXEC | PROT_READ |
1572                                                     PROT_WRITE,
1573 #ifdef CONFIG_HOMECACHE
1574                                                     MAP_CACHE_HOME_TASK |
1575 #endif
1576                                                     MAP_PRIVATE |
1577                                                     MAP_ANONYMOUS,
1578                                                     0);
1579
1580                 if (IS_ERR((void __force *)user_page)) {
1581                         pr_err("Out of kernel pages trying do_mmap\n");
1582                         goto done;
1583                 }
1584
1585                 /* Save the address in the thread_info struct */
1586                 info->unalign_jit_base = user_page;
1587                 if (unaligned_printk)
1588                         pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1589                                 raw_smp_processor_id(), current->pid,
1590                                 (unsigned long long)user_page);
1591         }
1592
1593         /* Generate unalign JIT */
1594         jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1595
1596 done:
1597         exception_exit(prev_state);
1598 }
1599
1600 #endif /* __tilegx__ */