Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / arch / arm64 / crypto / aes-neon.S
1 /*
2  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
3  *
4  * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
13
14 #define AES_ENTRY(func)         ENTRY(neon_ ## func)
15 #define AES_ENDPROC(func)       ENDPROC(neon_ ## func)
16
17         /* multiply by polynomial 'x' in GF(2^8) */
18         .macro          mul_by_x, out, in, temp, const
19         sshr            \temp, \in, #7
20         add             \out, \in, \in
21         and             \temp, \temp, \const
22         eor             \out, \out, \temp
23         .endm
24
25         /* preload the entire Sbox */
26         .macro          prepare, sbox, shiftrows, temp
27         adr             \temp, \sbox
28         movi            v12.16b, #0x40
29         ldr             q13, \shiftrows
30         movi            v14.16b, #0x1b
31         ld1             {v16.16b-v19.16b}, [\temp], #64
32         ld1             {v20.16b-v23.16b}, [\temp], #64
33         ld1             {v24.16b-v27.16b}, [\temp], #64
34         ld1             {v28.16b-v31.16b}, [\temp]
35         .endm
36
37         /* do preload for encryption */
38         .macro          enc_prepare, ignore0, ignore1, temp
39         prepare         .LForward_Sbox, .LForward_ShiftRows, \temp
40         .endm
41
42         .macro          enc_switch_key, ignore0, ignore1, temp
43         /* do nothing */
44         .endm
45
46         /* do preload for decryption */
47         .macro          dec_prepare, ignore0, ignore1, temp
48         prepare         .LReverse_Sbox, .LReverse_ShiftRows, \temp
49         .endm
50
51         /* apply SubBytes transformation using the the preloaded Sbox */
52         .macro          sub_bytes, in
53         sub             v9.16b, \in\().16b, v12.16b
54         tbl             \in\().16b, {v16.16b-v19.16b}, \in\().16b
55         sub             v10.16b, v9.16b, v12.16b
56         tbx             \in\().16b, {v20.16b-v23.16b}, v9.16b
57         sub             v11.16b, v10.16b, v12.16b
58         tbx             \in\().16b, {v24.16b-v27.16b}, v10.16b
59         tbx             \in\().16b, {v28.16b-v31.16b}, v11.16b
60         .endm
61
62         /* apply MixColumns transformation */
63         .macro          mix_columns, in
64         mul_by_x        v10.16b, \in\().16b, v9.16b, v14.16b
65         rev32           v8.8h, \in\().8h
66         eor             \in\().16b, v10.16b, \in\().16b
67         shl             v9.4s, v8.4s, #24
68         shl             v11.4s, \in\().4s, #24
69         sri             v9.4s, v8.4s, #8
70         sri             v11.4s, \in\().4s, #8
71         eor             v9.16b, v9.16b, v8.16b
72         eor             v10.16b, v10.16b, v9.16b
73         eor             \in\().16b, v10.16b, v11.16b
74         .endm
75
76         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
77         .macro          inv_mix_columns, in
78         mul_by_x        v11.16b, \in\().16b, v10.16b, v14.16b
79         mul_by_x        v11.16b, v11.16b, v10.16b, v14.16b
80         eor             \in\().16b, \in\().16b, v11.16b
81         rev32           v11.8h, v11.8h
82         eor             \in\().16b, \in\().16b, v11.16b
83         mix_columns     \in
84         .endm
85
86         .macro          do_block, enc, in, rounds, rk, rkp, i
87         ld1             {v15.4s}, [\rk]
88         add             \rkp, \rk, #16
89         mov             \i, \rounds
90 1111:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
91         tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
92         sub_bytes       \in
93         ld1             {v15.4s}, [\rkp], #16
94         subs            \i, \i, #1
95         beq             2222f
96         .if             \enc == 1
97         mix_columns     \in
98         .else
99         inv_mix_columns \in
100         .endif
101         b               1111b
102 2222:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
103         .endm
104
105         .macro          encrypt_block, in, rounds, rk, rkp, i
106         do_block        1, \in, \rounds, \rk, \rkp, \i
107         .endm
108
109         .macro          decrypt_block, in, rounds, rk, rkp, i
110         do_block        0, \in, \rounds, \rk, \rkp, \i
111         .endm
112
113         /*
114          * Interleaved versions: functionally equivalent to the
115          * ones above, but applied to 2 or 4 AES states in parallel.
116          */
117
118         .macro          sub_bytes_2x, in0, in1
119         sub             v8.16b, \in0\().16b, v12.16b
120         sub             v9.16b, \in1\().16b, v12.16b
121         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
122         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
123         sub             v10.16b, v8.16b, v12.16b
124         sub             v11.16b, v9.16b, v12.16b
125         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
126         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
127         sub             v8.16b, v10.16b, v12.16b
128         sub             v9.16b, v11.16b, v12.16b
129         tbx             \in0\().16b, {v24.16b-v27.16b}, v10.16b
130         tbx             \in1\().16b, {v24.16b-v27.16b}, v11.16b
131         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
132         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
133         .endm
134
135         .macro          sub_bytes_4x, in0, in1, in2, in3
136         sub             v8.16b, \in0\().16b, v12.16b
137         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
138         sub             v9.16b, \in1\().16b, v12.16b
139         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
140         sub             v10.16b, \in2\().16b, v12.16b
141         tbl             \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
142         sub             v11.16b, \in3\().16b, v12.16b
143         tbl             \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
144         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
145         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
146         sub             v8.16b, v8.16b, v12.16b
147         tbx             \in2\().16b, {v20.16b-v23.16b}, v10.16b
148         sub             v9.16b, v9.16b, v12.16b
149         tbx             \in3\().16b, {v20.16b-v23.16b}, v11.16b
150         sub             v10.16b, v10.16b, v12.16b
151         tbx             \in0\().16b, {v24.16b-v27.16b}, v8.16b
152         sub             v11.16b, v11.16b, v12.16b
153         tbx             \in1\().16b, {v24.16b-v27.16b}, v9.16b
154         sub             v8.16b, v8.16b, v12.16b
155         tbx             \in2\().16b, {v24.16b-v27.16b}, v10.16b
156         sub             v9.16b, v9.16b, v12.16b
157         tbx             \in3\().16b, {v24.16b-v27.16b}, v11.16b
158         sub             v10.16b, v10.16b, v12.16b
159         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
160         sub             v11.16b, v11.16b, v12.16b
161         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
162         tbx             \in2\().16b, {v28.16b-v31.16b}, v10.16b
163         tbx             \in3\().16b, {v28.16b-v31.16b}, v11.16b
164         .endm
165
166         .macro          mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
167         sshr            \tmp0\().16b, \in0\().16b,  #7
168         add             \out0\().16b, \in0\().16b,  \in0\().16b
169         sshr            \tmp1\().16b, \in1\().16b,  #7
170         and             \tmp0\().16b, \tmp0\().16b, \const\().16b
171         add             \out1\().16b, \in1\().16b,  \in1\().16b
172         and             \tmp1\().16b, \tmp1\().16b, \const\().16b
173         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
174         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
175         .endm
176
177         .macro          mix_columns_2x, in0, in1
178         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v14
179         rev32           v10.8h, \in0\().8h
180         rev32           v11.8h, \in1\().8h
181         eor             \in0\().16b, v8.16b, \in0\().16b
182         eor             \in1\().16b, v9.16b, \in1\().16b
183         shl             v12.4s, v10.4s, #24
184         shl             v13.4s, v11.4s, #24
185         eor             v8.16b, v8.16b, v10.16b
186         sri             v12.4s, v10.4s, #8
187         shl             v10.4s, \in0\().4s, #24
188         eor             v9.16b, v9.16b, v11.16b
189         sri             v13.4s, v11.4s, #8
190         shl             v11.4s, \in1\().4s, #24
191         sri             v10.4s, \in0\().4s, #8
192         eor             \in0\().16b, v8.16b, v12.16b
193         sri             v11.4s, \in1\().4s, #8
194         eor             \in1\().16b, v9.16b, v13.16b
195         eor             \in0\().16b, v10.16b, \in0\().16b
196         eor             \in1\().16b, v11.16b, \in1\().16b
197         .endm
198
199         .macro          inv_mix_cols_2x, in0, in1
200         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v14
201         mul_by_x_2x     v8, v9, v8, v9, v10, v11, v14
202         eor             \in0\().16b, \in0\().16b, v8.16b
203         eor             \in1\().16b, \in1\().16b, v9.16b
204         rev32           v8.8h, v8.8h
205         rev32           v9.8h, v9.8h
206         eor             \in0\().16b, \in0\().16b, v8.16b
207         eor             \in1\().16b, \in1\().16b, v9.16b
208         mix_columns_2x  \in0, \in1
209         .endm
210
211         .macro          inv_mix_cols_4x, in0, in1, in2, in3
212         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v14
213         mul_by_x_2x     v10, v11, \in2, \in3, v12, v13, v14
214         mul_by_x_2x     v8, v9, v8, v9, v12, v13, v14
215         mul_by_x_2x     v10, v11, v10, v11, v12, v13, v14
216         eor             \in0\().16b, \in0\().16b, v8.16b
217         eor             \in1\().16b, \in1\().16b, v9.16b
218         eor             \in2\().16b, \in2\().16b, v10.16b
219         eor             \in3\().16b, \in3\().16b, v11.16b
220         rev32           v8.8h, v8.8h
221         rev32           v9.8h, v9.8h
222         rev32           v10.8h, v10.8h
223         rev32           v11.8h, v11.8h
224         eor             \in0\().16b, \in0\().16b, v8.16b
225         eor             \in1\().16b, \in1\().16b, v9.16b
226         eor             \in2\().16b, \in2\().16b, v10.16b
227         eor             \in3\().16b, \in3\().16b, v11.16b
228         mix_columns_2x  \in0, \in1
229         mix_columns_2x  \in2, \in3
230         .endm
231
232         .macro          do_block_2x, enc, in0, in1 rounds, rk, rkp, i
233         ld1             {v15.4s}, [\rk]
234         add             \rkp, \rk, #16
235         mov             \i, \rounds
236 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
237         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
238         sub_bytes_2x    \in0, \in1
239         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
240         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
241         ld1             {v15.4s}, [\rkp], #16
242         subs            \i, \i, #1
243         beq             2222f
244         .if             \enc == 1
245         mix_columns_2x  \in0, \in1
246         ldr             q13, .LForward_ShiftRows
247         .else
248         inv_mix_cols_2x \in0, \in1
249         ldr             q13, .LReverse_ShiftRows
250         .endif
251         movi            v12.16b, #0x40
252         b               1111b
253 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
254         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
255         .endm
256
257         .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
258         ld1             {v15.4s}, [\rk]
259         add             \rkp, \rk, #16
260         mov             \i, \rounds
261 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
262         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
263         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
264         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
265         sub_bytes_4x    \in0, \in1, \in2, \in3
266         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
267         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
268         tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
269         tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
270         ld1             {v15.4s}, [\rkp], #16
271         subs            \i, \i, #1
272         beq             2222f
273         .if             \enc == 1
274         mix_columns_2x  \in0, \in1
275         mix_columns_2x  \in2, \in3
276         ldr             q13, .LForward_ShiftRows
277         .else
278         inv_mix_cols_4x \in0, \in1, \in2, \in3
279         ldr             q13, .LReverse_ShiftRows
280         .endif
281         movi            v12.16b, #0x40
282         b               1111b
283 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
284         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
285         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
286         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
287         .endm
288
289         .macro          encrypt_block2x, in0, in1, rounds, rk, rkp, i
290         do_block_2x     1, \in0, \in1, \rounds, \rk, \rkp, \i
291         .endm
292
293         .macro          decrypt_block2x, in0, in1, rounds, rk, rkp, i
294         do_block_2x     0, \in0, \in1, \rounds, \rk, \rkp, \i
295         .endm
296
297         .macro          encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
298         do_block_4x     1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
299         .endm
300
301         .macro          decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
302         do_block_4x     0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
303         .endm
304
305 #include "aes-modes.S"
306
307         .text
308         .align          4
309 .LForward_ShiftRows:
310 CPU_LE( .byte           0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3  )
311 CPU_LE( .byte           0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb  )
312 CPU_BE( .byte           0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8  )
313 CPU_BE( .byte           0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0  )
314
315 .LReverse_ShiftRows:
316 CPU_LE( .byte           0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb  )
317 CPU_LE( .byte           0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3  )
318 CPU_BE( .byte           0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8  )
319 CPU_BE( .byte           0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0  )
320
321 .LForward_Sbox:
322         .byte           0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
323         .byte           0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
324         .byte           0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
325         .byte           0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
326         .byte           0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
327         .byte           0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
328         .byte           0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
329         .byte           0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
330         .byte           0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
331         .byte           0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
332         .byte           0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
333         .byte           0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
334         .byte           0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
335         .byte           0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
336         .byte           0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
337         .byte           0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
338         .byte           0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
339         .byte           0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
340         .byte           0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
341         .byte           0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
342         .byte           0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
343         .byte           0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
344         .byte           0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
345         .byte           0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
346         .byte           0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
347         .byte           0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
348         .byte           0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
349         .byte           0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
350         .byte           0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
351         .byte           0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
352         .byte           0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
353         .byte           0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
354
355 .LReverse_Sbox:
356         .byte           0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
357         .byte           0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
358         .byte           0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
359         .byte           0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
360         .byte           0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
361         .byte           0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
362         .byte           0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
363         .byte           0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
364         .byte           0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
365         .byte           0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
366         .byte           0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
367         .byte           0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
368         .byte           0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
369         .byte           0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
370         .byte           0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
371         .byte           0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
372         .byte           0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
373         .byte           0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
374         .byte           0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
375         .byte           0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
376         .byte           0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
377         .byte           0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
378         .byte           0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
379         .byte           0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
380         .byte           0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
381         .byte           0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
382         .byte           0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
383         .byte           0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
384         .byte           0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
385         .byte           0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
386         .byte           0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
387         .byte           0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d