2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
15 .fpu crypto-neon-fp-armv8
18 .macro enc_round, state, key
20 aesmc.8 \state, \state
23 .macro dec_round, state, key
25 aesimc.8 \state, \state
28 .macro enc_dround, key1, key2
33 .macro dec_dround, key1, key2
38 .macro enc_fround, key1, key2, key3
44 .macro dec_fround, key1, key2, key3
50 .macro enc_dround_3x, key1, key2
59 .macro dec_dround_3x, key1, key2
68 .macro enc_fround_3x, key1, key2, key3
80 .macro dec_fround_3x, key1, key2, key3
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
96 vld1.8 {q12-q13}, [ip]!
98 vld1.8 {q10-q11}, [ip]!
100 vld1.8 {q12-q13}, [ip]!
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
104 beq 1f @ AES-192: 12 rounds
106 vld1.8 {q12-q13}, [ip]
108 0: \fround q12, q13, q14
112 \fround q10, q11, q14
117 * Internal, non-AAPCS compliant functions that implement the core AES
118 * transforms. These should preserve all registers except q0 - q2 and ip
120 * q0 : first in/output block
121 * q1 : second in/output block (_3x version only)
122 * q2 : third in/output block (_3x version only)
123 * q8 : first round key
124 * q9 : secound round key
125 * ip : address of 3rd round key
126 * q14 : final round key
127 * r3 : number of rounds
131 add ip, r2, #32 @ 3rd round key
133 do_block enc_dround, enc_fround
138 add ip, r2, #32 @ 3rd round key
139 do_block dec_dround, dec_fround
144 add ip, r2, #32 @ 3rd round key
145 do_block enc_dround_3x, enc_fround_3x
146 ENDPROC(aes_encrypt_3x)
150 add ip, r2, #32 @ 3rd round key
151 do_block dec_dround_3x, dec_fround_3x
152 ENDPROC(aes_decrypt_3x)
154 .macro prepare_key, rk, rounds
155 add ip, \rk, \rounds, lsl #4
156 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
157 vld1.8 {q14}, [ip] @ load last round key
161 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
163 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
166 ENTRY(ce_aes_ecb_encrypt)
173 vld1.8 {q0-q1}, [r1, :64]!
174 vld1.8 {q2}, [r1, :64]!
176 vst1.8 {q0-q1}, [r0, :64]!
177 vst1.8 {q2}, [r0, :64]!
183 vld1.8 {q0}, [r1, :64]!
185 vst1.8 {q0}, [r0, :64]!
190 ENDPROC(ce_aes_ecb_encrypt)
192 ENTRY(ce_aes_ecb_decrypt)
199 vld1.8 {q0-q1}, [r1, :64]!
200 vld1.8 {q2}, [r1, :64]!
202 vst1.8 {q0-q1}, [r0, :64]!
203 vst1.8 {q2}, [r0, :64]!
209 vld1.8 {q0}, [r1, :64]!
211 vst1.8 {q0}, [r0, :64]!
216 ENDPROC(ce_aes_ecb_decrypt)
219 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
220 * int blocks, u8 iv[])
221 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
222 * int blocks, u8 iv[])
224 ENTRY(ce_aes_cbc_encrypt)
226 ldrd r4, r5, [sp, #16]
230 vld1.8 {q1}, [r1, :64]! @ get next pt block
231 veor q0, q0, q1 @ ..and xor with iv
233 vst1.8 {q0}, [r0, :64]!
238 ENDPROC(ce_aes_cbc_encrypt)
240 ENTRY(ce_aes_cbc_decrypt)
242 ldrd r4, r5, [sp, #16]
243 vld1.8 {q6}, [r5] @ keep iv in q6
248 vld1.8 {q0-q1}, [r1, :64]!
249 vld1.8 {q2}, [r1, :64]!
258 vst1.8 {q0-q1}, [r0, :64]!
259 vst1.8 {q2}, [r0, :64]!
264 vmov q15, q14 @ preserve last round key
266 vld1.8 {q0}, [r1, :64]! @ get next ct block
267 veor q14, q15, q6 @ combine prev ct with last key
270 vst1.8 {q0}, [r0, :64]!
274 vst1.8 {q6}, [r5] @ keep iv in q6
276 ENDPROC(ce_aes_cbc_decrypt)
279 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
280 * int blocks, u8 ctr[])
282 ENTRY(ce_aes_ctr_encrypt)
284 ldrd r4, r5, [sp, #16]
285 vld1.8 {q6}, [r5] @ load ctr
287 vmov r6, s27 @ keep swabbed ctr in r6
289 cmn r6, r4 @ 32 bit overflow?
304 vld1.8 {q3-q4}, [r1, :64]!
305 vld1.8 {q5}, [r1, :64]!
311 vst1.8 {q0-q1}, [r0, :64]!
312 vst1.8 {q2}, [r0, :64]!
322 bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
323 vld1.8 {q3}, [r1, :64]!
325 vst1.8 {q3}, [r0, :64]!
327 adds r6, r6, #1 @ increment BE ctr
338 vld1.8 {d1}, [r1, :64]
340 vst1.8 {d0}, [r0, :64]
344 .irp sreg, s26, s25, s24
345 vmov ip, \sreg @ load next word of ctr
346 rev ip, ip @ ... to handle the carry
355 ENDPROC(ce_aes_ctr_encrypt)
358 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
359 * int blocks, u8 iv[], u8 const rk2[], int first)
360 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
361 * int blocks, u8 iv[], u8 const rk2[], int first)
364 .macro next_tweak, out, in, const, tmp
365 vshr.s64 \tmp, \in, #63
366 vand \tmp, \tmp, \const
367 vadd.u64 \out, \in, \in
368 vext.8 \tmp, \tmp, \tmp, #8
369 veor \out, \out, \tmp
377 vldr d14, .Lxts_mul_x
378 vldr d15, .Lxts_mul_x + 8
380 ldrd r4, r5, [sp, #16] @ load args
382 vld1.8 {q0}, [r5] @ load iv
383 teq r6, #1 @ start of a block?
386 @ Encrypt the IV in q0 with the second AES key. This should only
387 @ be done at the start of a block.
388 ldr r6, [sp, #24] @ load AES key 2
390 add ip, r6, #32 @ 3rd round key of key 2
391 b .Laes_encrypt_tweak @ tail call
392 ENDPROC(ce_aes_xts_init)
394 ENTRY(ce_aes_xts_encrypt)
397 bl ce_aes_xts_init @ run shared prologue
401 teq r6, #0 @ start of a block?
405 next_tweak q3, q3, q7, q6
409 vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
410 vld1.8 {q2}, [r1, :64]!
411 next_tweak q4, q3, q7, q6
413 next_tweak q5, q4, q7, q6
420 vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
421 vst1.8 {q2}, [r0, :64]!
430 vld1.8 {q0}, [r1, :64]!
434 vst1.8 {q0}, [r0, :64]!
437 next_tweak q3, q3, q7, q6
442 ENDPROC(ce_aes_xts_encrypt)
445 ENTRY(ce_aes_xts_decrypt)
448 bl ce_aes_xts_init @ run shared prologue
452 teq r6, #0 @ start of a block?
456 next_tweak q3, q3, q7, q6
460 vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
461 vld1.8 {q2}, [r1, :64]!
462 next_tweak q4, q3, q7, q6
464 next_tweak q5, q4, q7, q6
471 vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
472 vst1.8 {q2}, [r0, :64]!
481 vld1.8 {q0}, [r1, :64]!
483 add ip, r2, #32 @ 3rd round key
486 vst1.8 {q0}, [r0, :64]!
489 next_tweak q3, q3, q7, q6
494 ENDPROC(ce_aes_xts_decrypt)
497 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
498 * AES sbox substitution on each byte in
510 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
511 * operation on round key *src
518 ENDPROC(ce_aes_invert)