Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / arch / arm64 / crypto / aes-neon.S
1 /*
2  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
3  *
4  * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/linkage.h>
12
13 #define AES_ENTRY(func)         ENTRY(neon_ ## func)
14 #define AES_ENDPROC(func)       ENDPROC(neon_ ## func)
15
16         /* multiply by polynomial 'x' in GF(2^8) */
17         .macro          mul_by_x, out, in, temp, const
18         sshr            \temp, \in, #7
19         add             \out, \in, \in
20         and             \temp, \temp, \const
21         eor             \out, \out, \temp
22         .endm
23
24         /* preload the entire Sbox */
25         .macro          prepare, sbox, shiftrows, temp
26         adr             \temp, \sbox
27         movi            v12.16b, #0x40
28         ldr             q13, \shiftrows
29         movi            v14.16b, #0x1b
30         ld1             {v16.16b-v19.16b}, [\temp], #64
31         ld1             {v20.16b-v23.16b}, [\temp], #64
32         ld1             {v24.16b-v27.16b}, [\temp], #64
33         ld1             {v28.16b-v31.16b}, [\temp]
34         .endm
35
36         /* do preload for encryption */
37         .macro          enc_prepare, ignore0, ignore1, temp
38         prepare         .LForward_Sbox, .LForward_ShiftRows, \temp
39         .endm
40
41         .macro          enc_switch_key, ignore0, ignore1, temp
42         /* do nothing */
43         .endm
44
45         /* do preload for decryption */
46         .macro          dec_prepare, ignore0, ignore1, temp
47         prepare         .LReverse_Sbox, .LReverse_ShiftRows, \temp
48         .endm
49
50         /* apply SubBytes transformation using the the preloaded Sbox */
51         .macro          sub_bytes, in
52         sub             v9.16b, \in\().16b, v12.16b
53         tbl             \in\().16b, {v16.16b-v19.16b}, \in\().16b
54         sub             v10.16b, v9.16b, v12.16b
55         tbx             \in\().16b, {v20.16b-v23.16b}, v9.16b
56         sub             v11.16b, v10.16b, v12.16b
57         tbx             \in\().16b, {v24.16b-v27.16b}, v10.16b
58         tbx             \in\().16b, {v28.16b-v31.16b}, v11.16b
59         .endm
60
61         /* apply MixColumns transformation */
62         .macro          mix_columns, in
63         mul_by_x        v10.16b, \in\().16b, v9.16b, v14.16b
64         rev32           v8.8h, \in\().8h
65         eor             \in\().16b, v10.16b, \in\().16b
66         shl             v9.4s, v8.4s, #24
67         shl             v11.4s, \in\().4s, #24
68         sri             v9.4s, v8.4s, #8
69         sri             v11.4s, \in\().4s, #8
70         eor             v9.16b, v9.16b, v8.16b
71         eor             v10.16b, v10.16b, v9.16b
72         eor             \in\().16b, v10.16b, v11.16b
73         .endm
74
75         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
76         .macro          inv_mix_columns, in
77         mul_by_x        v11.16b, \in\().16b, v10.16b, v14.16b
78         mul_by_x        v11.16b, v11.16b, v10.16b, v14.16b
79         eor             \in\().16b, \in\().16b, v11.16b
80         rev32           v11.8h, v11.8h
81         eor             \in\().16b, \in\().16b, v11.16b
82         mix_columns     \in
83         .endm
84
85         .macro          do_block, enc, in, rounds, rk, rkp, i
86         ld1             {v15.16b}, [\rk]
87         add             \rkp, \rk, #16
88         mov             \i, \rounds
89 1111:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
90         tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
91         sub_bytes       \in
92         ld1             {v15.16b}, [\rkp], #16
93         subs            \i, \i, #1
94         beq             2222f
95         .if             \enc == 1
96         mix_columns     \in
97         .else
98         inv_mix_columns \in
99         .endif
100         b               1111b
101 2222:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
102         .endm
103
104         .macro          encrypt_block, in, rounds, rk, rkp, i
105         do_block        1, \in, \rounds, \rk, \rkp, \i
106         .endm
107
108         .macro          decrypt_block, in, rounds, rk, rkp, i
109         do_block        0, \in, \rounds, \rk, \rkp, \i
110         .endm
111
112         /*
113          * Interleaved versions: functionally equivalent to the
114          * ones above, but applied to 2 or 4 AES states in parallel.
115          */
116
117         .macro          sub_bytes_2x, in0, in1
118         sub             v8.16b, \in0\().16b, v12.16b
119         sub             v9.16b, \in1\().16b, v12.16b
120         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
121         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
122         sub             v10.16b, v8.16b, v12.16b
123         sub             v11.16b, v9.16b, v12.16b
124         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
125         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
126         sub             v8.16b, v10.16b, v12.16b
127         sub             v9.16b, v11.16b, v12.16b
128         tbx             \in0\().16b, {v24.16b-v27.16b}, v10.16b
129         tbx             \in1\().16b, {v24.16b-v27.16b}, v11.16b
130         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
131         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
132         .endm
133
134         .macro          sub_bytes_4x, in0, in1, in2, in3
135         sub             v8.16b, \in0\().16b, v12.16b
136         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
137         sub             v9.16b, \in1\().16b, v12.16b
138         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
139         sub             v10.16b, \in2\().16b, v12.16b
140         tbl             \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
141         sub             v11.16b, \in3\().16b, v12.16b
142         tbl             \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
143         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
144         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
145         sub             v8.16b, v8.16b, v12.16b
146         tbx             \in2\().16b, {v20.16b-v23.16b}, v10.16b
147         sub             v9.16b, v9.16b, v12.16b
148         tbx             \in3\().16b, {v20.16b-v23.16b}, v11.16b
149         sub             v10.16b, v10.16b, v12.16b
150         tbx             \in0\().16b, {v24.16b-v27.16b}, v8.16b
151         sub             v11.16b, v11.16b, v12.16b
152         tbx             \in1\().16b, {v24.16b-v27.16b}, v9.16b
153         sub             v8.16b, v8.16b, v12.16b
154         tbx             \in2\().16b, {v24.16b-v27.16b}, v10.16b
155         sub             v9.16b, v9.16b, v12.16b
156         tbx             \in3\().16b, {v24.16b-v27.16b}, v11.16b
157         sub             v10.16b, v10.16b, v12.16b
158         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
159         sub             v11.16b, v11.16b, v12.16b
160         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
161         tbx             \in2\().16b, {v28.16b-v31.16b}, v10.16b
162         tbx             \in3\().16b, {v28.16b-v31.16b}, v11.16b
163         .endm
164
165         .macro          mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
166         sshr            \tmp0\().16b, \in0\().16b,  #7
167         add             \out0\().16b, \in0\().16b,  \in0\().16b
168         sshr            \tmp1\().16b, \in1\().16b,  #7
169         and             \tmp0\().16b, \tmp0\().16b, \const\().16b
170         add             \out1\().16b, \in1\().16b,  \in1\().16b
171         and             \tmp1\().16b, \tmp1\().16b, \const\().16b
172         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
173         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
174         .endm
175
176         .macro          mix_columns_2x, in0, in1
177         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v14
178         rev32           v10.8h, \in0\().8h
179         rev32           v11.8h, \in1\().8h
180         eor             \in0\().16b, v8.16b, \in0\().16b
181         eor             \in1\().16b, v9.16b, \in1\().16b
182         shl             v12.4s, v10.4s, #24
183         shl             v13.4s, v11.4s, #24
184         eor             v8.16b, v8.16b, v10.16b
185         sri             v12.4s, v10.4s, #8
186         shl             v10.4s, \in0\().4s, #24
187         eor             v9.16b, v9.16b, v11.16b
188         sri             v13.4s, v11.4s, #8
189         shl             v11.4s, \in1\().4s, #24
190         sri             v10.4s, \in0\().4s, #8
191         eor             \in0\().16b, v8.16b, v12.16b
192         sri             v11.4s, \in1\().4s, #8
193         eor             \in1\().16b, v9.16b, v13.16b
194         eor             \in0\().16b, v10.16b, \in0\().16b
195         eor             \in1\().16b, v11.16b, \in1\().16b
196         .endm
197
198         .macro          inv_mix_cols_2x, in0, in1
199         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v14
200         mul_by_x_2x     v8, v9, v8, v9, v10, v11, v14
201         eor             \in0\().16b, \in0\().16b, v8.16b
202         eor             \in1\().16b, \in1\().16b, v9.16b
203         rev32           v8.8h, v8.8h
204         rev32           v9.8h, v9.8h
205         eor             \in0\().16b, \in0\().16b, v8.16b
206         eor             \in1\().16b, \in1\().16b, v9.16b
207         mix_columns_2x  \in0, \in1
208         .endm
209
210         .macro          inv_mix_cols_4x, in0, in1, in2, in3
211         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v14
212         mul_by_x_2x     v10, v11, \in2, \in3, v12, v13, v14
213         mul_by_x_2x     v8, v9, v8, v9, v12, v13, v14
214         mul_by_x_2x     v10, v11, v10, v11, v12, v13, v14
215         eor             \in0\().16b, \in0\().16b, v8.16b
216         eor             \in1\().16b, \in1\().16b, v9.16b
217         eor             \in2\().16b, \in2\().16b, v10.16b
218         eor             \in3\().16b, \in3\().16b, v11.16b
219         rev32           v8.8h, v8.8h
220         rev32           v9.8h, v9.8h
221         rev32           v10.8h, v10.8h
222         rev32           v11.8h, v11.8h
223         eor             \in0\().16b, \in0\().16b, v8.16b
224         eor             \in1\().16b, \in1\().16b, v9.16b
225         eor             \in2\().16b, \in2\().16b, v10.16b
226         eor             \in3\().16b, \in3\().16b, v11.16b
227         mix_columns_2x  \in0, \in1
228         mix_columns_2x  \in2, \in3
229         .endm
230
231         .macro          do_block_2x, enc, in0, in1 rounds, rk, rkp, i
232         ld1             {v15.16b}, [\rk]
233         add             \rkp, \rk, #16
234         mov             \i, \rounds
235 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
236         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
237         sub_bytes_2x    \in0, \in1
238         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
239         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
240         ld1             {v15.16b}, [\rkp], #16
241         subs            \i, \i, #1
242         beq             2222f
243         .if             \enc == 1
244         mix_columns_2x  \in0, \in1
245         ldr             q13, .LForward_ShiftRows
246         .else
247         inv_mix_cols_2x \in0, \in1
248         ldr             q13, .LReverse_ShiftRows
249         .endif
250         movi            v12.16b, #0x40
251         b               1111b
252 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
253         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
254         .endm
255
256         .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
257         ld1             {v15.16b}, [\rk]
258         add             \rkp, \rk, #16
259         mov             \i, \rounds
260 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
261         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
262         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
263         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
264         sub_bytes_4x    \in0, \in1, \in2, \in3
265         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
266         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
267         tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
268         tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
269         ld1             {v15.16b}, [\rkp], #16
270         subs            \i, \i, #1
271         beq             2222f
272         .if             \enc == 1
273         mix_columns_2x  \in0, \in1
274         mix_columns_2x  \in2, \in3
275         ldr             q13, .LForward_ShiftRows
276         .else
277         inv_mix_cols_4x \in0, \in1, \in2, \in3
278         ldr             q13, .LReverse_ShiftRows
279         .endif
280         movi            v12.16b, #0x40
281         b               1111b
282 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
283         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
284         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
285         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
286         .endm
287
288         .macro          encrypt_block2x, in0, in1, rounds, rk, rkp, i
289         do_block_2x     1, \in0, \in1, \rounds, \rk, \rkp, \i
290         .endm
291
292         .macro          decrypt_block2x, in0, in1, rounds, rk, rkp, i
293         do_block_2x     0, \in0, \in1, \rounds, \rk, \rkp, \i
294         .endm
295
296         .macro          encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
297         do_block_4x     1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
298         .endm
299
300         .macro          decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
301         do_block_4x     0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
302         .endm
303
304 #include "aes-modes.S"
305
306         .text
307         .align          4
308 .LForward_ShiftRows:
309         .byte           0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
310         .byte           0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
311
312 .LReverse_ShiftRows:
313         .byte           0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
314         .byte           0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
315
316 .LForward_Sbox:
317         .byte           0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
318         .byte           0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
319         .byte           0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
320         .byte           0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
321         .byte           0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
322         .byte           0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
323         .byte           0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
324         .byte           0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
325         .byte           0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
326         .byte           0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
327         .byte           0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
328         .byte           0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
329         .byte           0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
330         .byte           0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
331         .byte           0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
332         .byte           0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
333         .byte           0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
334         .byte           0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
335         .byte           0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
336         .byte           0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
337         .byte           0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
338         .byte           0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
339         .byte           0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
340         .byte           0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
341         .byte           0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
342         .byte           0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
343         .byte           0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
344         .byte           0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
345         .byte           0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
346         .byte           0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
347         .byte           0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
348         .byte           0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
349
350 .LReverse_Sbox:
351         .byte           0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
352         .byte           0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
353         .byte           0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
354         .byte           0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
355         .byte           0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
356         .byte           0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
357         .byte           0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
358         .byte           0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
359         .byte           0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
360         .byte           0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
361         .byte           0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
362         .byte           0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
363         .byte           0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
364         .byte           0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
365         .byte           0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
366         .byte           0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
367         .byte           0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
368         .byte           0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
369         .byte           0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
370         .byte           0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
371         .byte           0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
372         .byte           0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
373         .byte           0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
374         .byte           0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
375         .byte           0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
376         .byte           0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
377         .byte           0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
378         .byte           0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
379         .byte           0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
380         .byte           0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
381         .byte           0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
382         .byte           0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d