Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / arch / powerpc / crypto / aes-spe-modes.S
1 /*
2  * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
3  *
4  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2 of the License, or (at your option)
9  * any later version.
10  *
11  */
12
13 #include <asm/ppc_asm.h>
14 #include "aes-spe-regs.h"
15
16 #ifdef __BIG_ENDIAN__                   /* Macros for big endian builds */
17
18 #define LOAD_DATA(reg, off) \
19         lwz             reg,off(rSP);   /* load with offset             */
20 #define SAVE_DATA(reg, off) \
21         stw             reg,off(rDP);   /* save with offset             */
22 #define NEXT_BLOCK \
23         addi            rSP,rSP,16;     /* increment pointers per bloc  */ \
24         addi            rDP,rDP,16;
25 #define LOAD_IV(reg, off) \
26         lwz             reg,off(rIP);   /* IV loading with offset       */
27 #define SAVE_IV(reg, off) \
28         stw             reg,off(rIP);   /* IV saving with offset        */
29 #define START_IV                        /* nothing to reset             */
30 #define CBC_DEC 16                      /* CBC decrement per block      */
31 #define CTR_DEC 1                       /* CTR decrement one byte       */
32
33 #else                                   /* Macros for little endian     */
34
35 #define LOAD_DATA(reg, off) \
36         lwbrx           reg,0,rSP;      /* load reversed                */ \
37         addi            rSP,rSP,4;      /* and increment pointer        */
38 #define SAVE_DATA(reg, off) \
39         stwbrx          reg,0,rDP;      /* save reversed                */ \
40         addi            rDP,rDP,4;      /* and increment pointer        */
41 #define NEXT_BLOCK                      /* nothing todo                 */
42 #define LOAD_IV(reg, off) \
43         lwbrx           reg,0,rIP;      /* load reversed                */ \
44         addi            rIP,rIP,4;      /* and increment pointer        */
45 #define SAVE_IV(reg, off) \
46         stwbrx          reg,0,rIP;      /* load reversed                */ \
47         addi            rIP,rIP,4;      /* and increment pointer        */
48 #define START_IV \
49         subi            rIP,rIP,16;     /* must reset pointer           */
50 #define CBC_DEC 32                      /* 2 blocks because of incs     */
51 #define CTR_DEC 17                      /* 1 block because of incs      */
52
53 #endif
54
55 #define SAVE_0_REGS
56 #define LOAD_0_REGS
57
58 #define SAVE_4_REGS \
59         stw             rI0,96(r1);     /* save 32 bit registers        */ \
60         stw             rI1,100(r1);                                       \
61         stw             rI2,104(r1);                                       \
62         stw             rI3,108(r1);
63
64 #define LOAD_4_REGS \
65         lwz             rI0,96(r1);     /* restore 32 bit registers     */ \
66         lwz             rI1,100(r1);                                       \
67         lwz             rI2,104(r1);                                       \
68         lwz             rI3,108(r1);
69
70 #define SAVE_8_REGS \
71         SAVE_4_REGS                                                        \
72         stw             rG0,112(r1);    /* save 32 bit registers        */ \
73         stw             rG1,116(r1);                                       \
74         stw             rG2,120(r1);                                       \
75         stw             rG3,124(r1);
76
77 #define LOAD_8_REGS \
78         LOAD_4_REGS                                                        \
79         lwz             rG0,112(r1);    /* restore 32 bit registers     */ \
80         lwz             rG1,116(r1);                                       \
81         lwz             rG2,120(r1);                                       \
82         lwz             rG3,124(r1);
83
84 #define INITIALIZE_CRYPT(tab,nr32bitregs) \
85         mflr            r0;                                                \
86         stwu            r1,-160(r1);    /* create stack frame           */ \
87         lis             rT0,tab@h;      /* en-/decryption table pointer */ \
88         stw             r0,8(r1);       /* save link register           */ \
89         ori             rT0,rT0,tab@l;                                     \
90         evstdw          r14,16(r1);                                        \
91         mr              rKS,rKP;                                           \
92         evstdw          r15,24(r1);     /* We must save non volatile    */ \
93         evstdw          r16,32(r1);     /* registers. Take the chance   */ \
94         evstdw          r17,40(r1);     /* and save the SPE part too    */ \
95         evstdw          r18,48(r1);                                        \
96         evstdw          r19,56(r1);                                        \
97         evstdw          r20,64(r1);                                        \
98         evstdw          r21,72(r1);                                        \
99         evstdw          r22,80(r1);                                        \
100         evstdw          r23,88(r1);                                        \
101         SAVE_##nr32bitregs##_REGS
102
103 #define FINALIZE_CRYPT(nr32bitregs) \
104         lwz             r0,8(r1);                                          \
105         evldw           r14,16(r1);     /* restore SPE registers        */ \
106         evldw           r15,24(r1);                                        \
107         evldw           r16,32(r1);                                        \
108         evldw           r17,40(r1);                                        \
109         evldw           r18,48(r1);                                        \
110         evldw           r19,56(r1);                                        \
111         evldw           r20,64(r1);                                        \
112         evldw           r21,72(r1);                                        \
113         evldw           r22,80(r1);                                        \
114         evldw           r23,88(r1);                                        \
115         LOAD_##nr32bitregs##_REGS                                          \
116         mtlr            r0;             /* restore link register        */ \
117         xor             r0,r0,r0;                                          \
118         stw             r0,16(r1);      /* delete sensitive data        */ \
119         stw             r0,24(r1);      /* that we might have pushed    */ \
120         stw             r0,32(r1);      /* from other context that runs */ \
121         stw             r0,40(r1);      /* the same code                */ \
122         stw             r0,48(r1);                                         \
123         stw             r0,56(r1);                                         \
124         stw             r0,64(r1);                                         \
125         stw             r0,72(r1);                                         \
126         stw             r0,80(r1);                                         \
127         stw             r0,88(r1);                                         \
128         addi            r1,r1,160;      /* cleanup stack frame          */
129
130 #define ENDIAN_SWAP(t0, t1, s0, s1) \
131         rotrwi          t0,s0,8;        /* swap endianness for 2 GPRs   */ \
132         rotrwi          t1,s1,8;                                           \
133         rlwimi          t0,s0,8,8,15;                                      \
134         rlwimi          t1,s1,8,8,15;                                      \
135         rlwimi          t0,s0,8,24,31;                                     \
136         rlwimi          t1,s1,8,24,31;
137
138 #define GF128_MUL(d0, d1, d2, d3, t0) \
139         li              t0,0x87;        /* multiplication in GF128      */ \
140         cmpwi           d3,-1;                                             \
141         iselgt          t0,0,t0;                                           \
142         rlwimi          d3,d2,0,0,0;    /* propagate "carry" bits       */ \
143         rotlwi          d3,d3,1;                                           \
144         rlwimi          d2,d1,0,0,0;                                       \
145         rotlwi          d2,d2,1;                                           \
146         rlwimi          d1,d0,0,0,0;                                       \
147         slwi            d0,d0,1;        /* shift left 128 bit           */ \
148         rotlwi          d1,d1,1;                                           \
149         xor             d0,d0,t0;
150
151 #define START_KEY(d0, d1, d2, d3) \
152         lwz             rW0,0(rKP);                                        \
153         mtctr           rRR;                                               \
154         lwz             rW1,4(rKP);                                        \
155         lwz             rW2,8(rKP);                                        \
156         lwz             rW3,12(rKP);                                       \
157         xor             rD0,d0,rW0;                                        \
158         xor             rD1,d1,rW1;                                        \
159         xor             rD2,d2,rW2;                                        \
160         xor             rD3,d3,rW3;
161
162 /*
163  * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
164  *                 u32 rounds)
165  *
166  * called from glue layer to encrypt a single 16 byte block
167  * round values are AES128 = 4, AES192 = 5, AES256 = 6
168  *
169  */
170 _GLOBAL(ppc_encrypt_aes)
171         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
172         LOAD_DATA(rD0, 0)
173         LOAD_DATA(rD1, 4)
174         LOAD_DATA(rD2, 8)
175         LOAD_DATA(rD3, 12)
176         START_KEY(rD0, rD1, rD2, rD3)
177         bl              ppc_encrypt_block
178         xor             rD0,rD0,rW0
179         SAVE_DATA(rD0, 0)
180         xor             rD1,rD1,rW1
181         SAVE_DATA(rD1, 4)
182         xor             rD2,rD2,rW2
183         SAVE_DATA(rD2, 8)
184         xor             rD3,rD3,rW3
185         SAVE_DATA(rD3, 12)
186         FINALIZE_CRYPT(0)
187         blr
188
189 /*
190  * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
191  *                 u32 rounds)
192  *
193  * called from glue layer to decrypt a single 16 byte block
194  * round values are AES128 = 4, AES192 = 5, AES256 = 6
195  *
196  */
197 _GLOBAL(ppc_decrypt_aes)
198         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
199         LOAD_DATA(rD0, 0)
200         addi            rT1,rT0,4096
201         LOAD_DATA(rD1, 4)
202         LOAD_DATA(rD2, 8)
203         LOAD_DATA(rD3, 12)
204         START_KEY(rD0, rD1, rD2, rD3)
205         bl              ppc_decrypt_block
206         xor             rD0,rD0,rW0
207         SAVE_DATA(rD0, 0)
208         xor             rD1,rD1,rW1
209         SAVE_DATA(rD1, 4)
210         xor             rD2,rD2,rW2
211         SAVE_DATA(rD2, 8)
212         xor             rD3,rD3,rW3
213         SAVE_DATA(rD3, 12)
214         FINALIZE_CRYPT(0)
215         blr
216
217 /*
218  * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
219  *                 u32 rounds, u32 bytes);
220  *
221  * called from glue layer to encrypt multiple blocks via ECB
222  * Bytes must be larger or equal 16 and only whole blocks are
223  * processed. round values are AES128 = 4, AES192 = 5 and
224  * AES256 = 6
225  *
226  */
227 _GLOBAL(ppc_encrypt_ecb)
228         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
229 ppc_encrypt_ecb_loop:
230         LOAD_DATA(rD0, 0)
231         mr              rKP,rKS
232         LOAD_DATA(rD1, 4)
233         subi            rLN,rLN,16
234         LOAD_DATA(rD2, 8)
235         cmpwi           rLN,15
236         LOAD_DATA(rD3, 12)
237         START_KEY(rD0, rD1, rD2, rD3)
238         bl              ppc_encrypt_block
239         xor             rD0,rD0,rW0
240         SAVE_DATA(rD0, 0)
241         xor             rD1,rD1,rW1
242         SAVE_DATA(rD1, 4)
243         xor             rD2,rD2,rW2
244         SAVE_DATA(rD2, 8)
245         xor             rD3,rD3,rW3
246         SAVE_DATA(rD3, 12)
247         NEXT_BLOCK
248         bt              gt,ppc_encrypt_ecb_loop
249         FINALIZE_CRYPT(0)
250         blr
251
252 /*
253  * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
254  *                 u32 rounds, u32 bytes);
255  *
256  * called from glue layer to decrypt multiple blocks via ECB
257  * Bytes must be larger or equal 16 and only whole blocks are
258  * processed. round values are AES128 = 4, AES192 = 5 and
259  * AES256 = 6
260  *
261  */
262 _GLOBAL(ppc_decrypt_ecb)
263         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
264         addi            rT1,rT0,4096
265 ppc_decrypt_ecb_loop:
266         LOAD_DATA(rD0, 0)
267         mr              rKP,rKS
268         LOAD_DATA(rD1, 4)
269         subi            rLN,rLN,16
270         LOAD_DATA(rD2, 8)
271         cmpwi           rLN,15
272         LOAD_DATA(rD3, 12)
273         START_KEY(rD0, rD1, rD2, rD3)
274         bl              ppc_decrypt_block
275         xor             rD0,rD0,rW0
276         SAVE_DATA(rD0, 0)
277         xor             rD1,rD1,rW1
278         SAVE_DATA(rD1, 4)
279         xor             rD2,rD2,rW2
280         SAVE_DATA(rD2, 8)
281         xor             rD3,rD3,rW3
282         SAVE_DATA(rD3, 12)
283         NEXT_BLOCK
284         bt              gt,ppc_decrypt_ecb_loop
285         FINALIZE_CRYPT(0)
286         blr
287
288 /*
289  * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
290  *                 32 rounds, u32 bytes, u8 *iv);
291  *
292  * called from glue layer to encrypt multiple blocks via CBC
293  * Bytes must be larger or equal 16 and only whole blocks are
294  * processed. round values are AES128 = 4, AES192 = 5 and
295  * AES256 = 6
296  *
297  */
298 _GLOBAL(ppc_encrypt_cbc)
299         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
300         LOAD_IV(rI0, 0)
301         LOAD_IV(rI1, 4)
302         LOAD_IV(rI2, 8)
303         LOAD_IV(rI3, 12)
304 ppc_encrypt_cbc_loop:
305         LOAD_DATA(rD0, 0)
306         mr              rKP,rKS
307         LOAD_DATA(rD1, 4)
308         subi            rLN,rLN,16
309         LOAD_DATA(rD2, 8)
310         cmpwi           rLN,15
311         LOAD_DATA(rD3, 12)
312         xor             rD0,rD0,rI0
313         xor             rD1,rD1,rI1
314         xor             rD2,rD2,rI2
315         xor             rD3,rD3,rI3
316         START_KEY(rD0, rD1, rD2, rD3)
317         bl              ppc_encrypt_block
318         xor             rI0,rD0,rW0
319         SAVE_DATA(rI0, 0)
320         xor             rI1,rD1,rW1
321         SAVE_DATA(rI1, 4)
322         xor             rI2,rD2,rW2
323         SAVE_DATA(rI2, 8)
324         xor             rI3,rD3,rW3
325         SAVE_DATA(rI3, 12)
326         NEXT_BLOCK
327         bt              gt,ppc_encrypt_cbc_loop
328         START_IV
329         SAVE_IV(rI0, 0)
330         SAVE_IV(rI1, 4)
331         SAVE_IV(rI2, 8)
332         SAVE_IV(rI3, 12)
333         FINALIZE_CRYPT(4)
334         blr
335
336 /*
337  * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
338  *                 u32 rounds, u32 bytes, u8 *iv);
339  *
340  * called from glue layer to decrypt multiple blocks via CBC
341  * round values are AES128 = 4, AES192 = 5, AES256 = 6
342  *
343  */
344 _GLOBAL(ppc_decrypt_cbc)
345         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
346         li              rT1,15
347         LOAD_IV(rI0, 0)
348         andc            rLN,rLN,rT1
349         LOAD_IV(rI1, 4)
350         subi            rLN,rLN,16
351         LOAD_IV(rI2, 8)
352         add             rSP,rSP,rLN     /* reverse processing           */
353         LOAD_IV(rI3, 12)
354         add             rDP,rDP,rLN
355         LOAD_DATA(rD0, 0)
356         addi            rT1,rT0,4096
357         LOAD_DATA(rD1, 4)
358         LOAD_DATA(rD2, 8)
359         LOAD_DATA(rD3, 12)
360         START_IV
361         SAVE_IV(rD0, 0)
362         SAVE_IV(rD1, 4)
363         SAVE_IV(rD2, 8)
364         cmpwi           rLN,16
365         SAVE_IV(rD3, 12)
366         bt              lt,ppc_decrypt_cbc_end
367 ppc_decrypt_cbc_loop:
368         mr              rKP,rKS
369         START_KEY(rD0, rD1, rD2, rD3)
370         bl              ppc_decrypt_block
371         subi            rLN,rLN,16
372         subi            rSP,rSP,CBC_DEC
373         xor             rW0,rD0,rW0
374         LOAD_DATA(rD0, 0)
375         xor             rW1,rD1,rW1
376         LOAD_DATA(rD1, 4)
377         xor             rW2,rD2,rW2
378         LOAD_DATA(rD2, 8)
379         xor             rW3,rD3,rW3
380         LOAD_DATA(rD3, 12)
381         xor             rW0,rW0,rD0
382         SAVE_DATA(rW0, 0)
383         xor             rW1,rW1,rD1
384         SAVE_DATA(rW1, 4)
385         xor             rW2,rW2,rD2
386         SAVE_DATA(rW2, 8)
387         xor             rW3,rW3,rD3
388         SAVE_DATA(rW3, 12)
389         cmpwi           rLN,15
390         subi            rDP,rDP,CBC_DEC
391         bt              gt,ppc_decrypt_cbc_loop
392 ppc_decrypt_cbc_end:
393         mr              rKP,rKS
394         START_KEY(rD0, rD1, rD2, rD3)
395         bl              ppc_decrypt_block
396         xor             rW0,rW0,rD0
397         xor             rW1,rW1,rD1
398         xor             rW2,rW2,rD2
399         xor             rW3,rW3,rD3
400         xor             rW0,rW0,rI0     /* decrypt with initial IV      */
401         SAVE_DATA(rW0, 0)
402         xor             rW1,rW1,rI1
403         SAVE_DATA(rW1, 4)
404         xor             rW2,rW2,rI2
405         SAVE_DATA(rW2, 8)
406         xor             rW3,rW3,rI3
407         SAVE_DATA(rW3, 12)
408         FINALIZE_CRYPT(4)
409         blr
410
411 /*
412  * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
413  *               u32 rounds, u32 bytes, u8 *iv);
414  *
415  * called from glue layer to encrypt/decrypt multiple blocks
416  * via CTR. Number of bytes does not need to be a multiple of
417  * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
418  *
419  */
420 _GLOBAL(ppc_crypt_ctr)
421         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
422         LOAD_IV(rI0, 0)
423         LOAD_IV(rI1, 4)
424         LOAD_IV(rI2, 8)
425         cmpwi           rLN,16
426         LOAD_IV(rI3, 12)
427         START_IV
428         bt              lt,ppc_crypt_ctr_partial
429 ppc_crypt_ctr_loop:
430         mr              rKP,rKS
431         START_KEY(rI0, rI1, rI2, rI3)
432         bl              ppc_encrypt_block
433         xor             rW0,rD0,rW0
434         xor             rW1,rD1,rW1
435         xor             rW2,rD2,rW2
436         xor             rW3,rD3,rW3
437         LOAD_DATA(rD0, 0)
438         subi            rLN,rLN,16
439         LOAD_DATA(rD1, 4)
440         LOAD_DATA(rD2, 8)
441         LOAD_DATA(rD3, 12)
442         xor             rD0,rD0,rW0
443         SAVE_DATA(rD0, 0)
444         xor             rD1,rD1,rW1
445         SAVE_DATA(rD1, 4)
446         xor             rD2,rD2,rW2
447         SAVE_DATA(rD2, 8)
448         xor             rD3,rD3,rW3
449         SAVE_DATA(rD3, 12)
450         addic           rI3,rI3,1       /* increase counter                     */
451         addze           rI2,rI2
452         addze           rI1,rI1
453         addze           rI0,rI0
454         NEXT_BLOCK
455         cmpwi           rLN,15
456         bt              gt,ppc_crypt_ctr_loop
457 ppc_crypt_ctr_partial:
458         cmpwi           rLN,0
459         bt              eq,ppc_crypt_ctr_end
460         mr              rKP,rKS
461         START_KEY(rI0, rI1, rI2, rI3)
462         bl              ppc_encrypt_block
463         xor             rW0,rD0,rW0
464         SAVE_IV(rW0, 0)
465         xor             rW1,rD1,rW1
466         SAVE_IV(rW1, 4)
467         xor             rW2,rD2,rW2
468         SAVE_IV(rW2, 8)
469         xor             rW3,rD3,rW3
470         SAVE_IV(rW3, 12)
471         mtctr           rLN
472         subi            rIP,rIP,CTR_DEC
473         subi            rSP,rSP,1
474         subi            rDP,rDP,1
475 ppc_crypt_ctr_xorbyte:
476         lbzu            rW4,1(rIP)      /* bytewise xor for partial block       */
477         lbzu            rW5,1(rSP)
478         xor             rW4,rW4,rW5
479         stbu            rW4,1(rDP)
480         bdnz            ppc_crypt_ctr_xorbyte
481         subf            rIP,rLN,rIP
482         addi            rIP,rIP,1
483         addic           rI3,rI3,1
484         addze           rI2,rI2
485         addze           rI1,rI1
486         addze           rI0,rI0
487 ppc_crypt_ctr_end:
488         SAVE_IV(rI0, 0)
489         SAVE_IV(rI1, 4)
490         SAVE_IV(rI2, 8)
491         SAVE_IV(rI3, 12)
492         FINALIZE_CRYPT(4)
493         blr
494
495 /*
496  * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
497  *                 u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
498  *
499  * called from glue layer to encrypt multiple blocks via XTS
500  * If key_twk is given, the initial IV encryption will be
501  * processed too. Round values are AES128 = 4, AES192 = 5,
502  * AES256 = 6
503  *
504  */
505 _GLOBAL(ppc_encrypt_xts)
506         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
507         LOAD_IV(rI0, 0)
508         LOAD_IV(rI1, 4)
509         LOAD_IV(rI2, 8)
510         cmpwi           rKT,0
511         LOAD_IV(rI3, 12)
512         bt              eq,ppc_encrypt_xts_notweak
513         mr              rKP,rKT
514         START_KEY(rI0, rI1, rI2, rI3)
515         bl              ppc_encrypt_block
516         xor             rI0,rD0,rW0
517         xor             rI1,rD1,rW1
518         xor             rI2,rD2,rW2
519         xor             rI3,rD3,rW3
520 ppc_encrypt_xts_notweak:
521         ENDIAN_SWAP(rG0, rG1, rI0, rI1)
522         ENDIAN_SWAP(rG2, rG3, rI2, rI3)
523 ppc_encrypt_xts_loop:
524         LOAD_DATA(rD0, 0)
525         mr              rKP,rKS
526         LOAD_DATA(rD1, 4)
527         subi            rLN,rLN,16
528         LOAD_DATA(rD2, 8)
529         LOAD_DATA(rD3, 12)
530         xor             rD0,rD0,rI0
531         xor             rD1,rD1,rI1
532         xor             rD2,rD2,rI2
533         xor             rD3,rD3,rI3
534         START_KEY(rD0, rD1, rD2, rD3)
535         bl              ppc_encrypt_block
536         xor             rD0,rD0,rW0
537         xor             rD1,rD1,rW1
538         xor             rD2,rD2,rW2
539         xor             rD3,rD3,rW3
540         xor             rD0,rD0,rI0
541         SAVE_DATA(rD0, 0)
542         xor             rD1,rD1,rI1
543         SAVE_DATA(rD1, 4)
544         xor             rD2,rD2,rI2
545         SAVE_DATA(rD2, 8)
546         xor             rD3,rD3,rI3
547         SAVE_DATA(rD3, 12)
548         GF128_MUL(rG0, rG1, rG2, rG3, rW0)
549         ENDIAN_SWAP(rI0, rI1, rG0, rG1)
550         ENDIAN_SWAP(rI2, rI3, rG2, rG3)
551         cmpwi           rLN,0
552         NEXT_BLOCK
553         bt              gt,ppc_encrypt_xts_loop
554         START_IV
555         SAVE_IV(rI0, 0)
556         SAVE_IV(rI1, 4)
557         SAVE_IV(rI2, 8)
558         SAVE_IV(rI3, 12)
559         FINALIZE_CRYPT(8)
560         blr
561
562 /*
563  * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
564  *                 u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
565  *
566  * called from glue layer to decrypt multiple blocks via XTS
567  * If key_twk is given, the initial IV encryption will be
568  * processed too. Round values are AES128 = 4, AES192 = 5,
569  * AES256 = 6
570  *
571  */
572 _GLOBAL(ppc_decrypt_xts)
573         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
574         LOAD_IV(rI0, 0)
575         addi            rT1,rT0,4096
576         LOAD_IV(rI1, 4)
577         LOAD_IV(rI2, 8)
578         cmpwi           rKT,0
579         LOAD_IV(rI3, 12)
580         bt              eq,ppc_decrypt_xts_notweak
581         subi            rT0,rT0,4096
582         mr              rKP,rKT
583         START_KEY(rI0, rI1, rI2, rI3)
584         bl              ppc_encrypt_block
585         xor             rI0,rD0,rW0
586         xor             rI1,rD1,rW1
587         xor             rI2,rD2,rW2
588         xor             rI3,rD3,rW3
589         addi            rT0,rT0,4096
590 ppc_decrypt_xts_notweak:
591         ENDIAN_SWAP(rG0, rG1, rI0, rI1)
592         ENDIAN_SWAP(rG2, rG3, rI2, rI3)
593 ppc_decrypt_xts_loop:
594         LOAD_DATA(rD0, 0)
595         mr              rKP,rKS
596         LOAD_DATA(rD1, 4)
597         subi            rLN,rLN,16
598         LOAD_DATA(rD2, 8)
599         LOAD_DATA(rD3, 12)
600         xor             rD0,rD0,rI0
601         xor             rD1,rD1,rI1
602         xor             rD2,rD2,rI2
603         xor             rD3,rD3,rI3
604         START_KEY(rD0, rD1, rD2, rD3)
605         bl              ppc_decrypt_block
606         xor             rD0,rD0,rW0
607         xor             rD1,rD1,rW1
608         xor             rD2,rD2,rW2
609         xor             rD3,rD3,rW3
610         xor             rD0,rD0,rI0
611         SAVE_DATA(rD0, 0)
612         xor             rD1,rD1,rI1
613         SAVE_DATA(rD1, 4)
614         xor             rD2,rD2,rI2
615         SAVE_DATA(rD2, 8)
616         xor             rD3,rD3,rI3
617         SAVE_DATA(rD3, 12)
618         GF128_MUL(rG0, rG1, rG2, rG3, rW0)
619         ENDIAN_SWAP(rI0, rI1, rG0, rG1)
620         ENDIAN_SWAP(rI2, rI3, rG2, rG3)
621         cmpwi           rLN,0
622         NEXT_BLOCK
623         bt              gt,ppc_decrypt_xts_loop
624         START_IV
625         SAVE_IV(rI0, 0)
626         SAVE_IV(rI1, 4)
627         SAVE_IV(rI2, 8)
628         SAVE_IV(rI3, 12)
629         FINALIZE_CRYPT(8)
630         blr