Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / drivers / crypto / vmx / aesp8-ppc.pl
1 #!/usr/bin/env perl
2 #
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9 #
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
23
24 $flavour = shift;
25
26 if ($flavour =~ /64/) {
27         $SIZE_T =8;
28         $LRSAVE =2*$SIZE_T;
29         $STU    ="stdu";
30         $POP    ="ld";
31         $PUSH   ="std";
32         $UCMP   ="cmpld";
33         $SHL    ="sldi";
34 } elsif ($flavour =~ /32/) {
35         $SIZE_T =4;
36         $LRSAVE =$SIZE_T;
37         $STU    ="stwu";
38         $POP    ="lwz";
39         $PUSH   ="stw";
40         $UCMP   ="cmplw";
41         $SHL    ="slwi";
42 } else { die "nonsense $flavour"; }
43
44 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
45
46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49 die "can't locate ppc-xlate.pl";
50
51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
52
53 $FRAME=8*$SIZE_T;
54 $prefix="aes_p8";
55
56 $sp="r1";
57 $vrsave="r12";
58
59 #########################################################################
60 {{{     # Key setup procedures                                          #
61 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
64
65 $code.=<<___;
66 .machine        "any"
67
68 .text
69
70 .align  7
71 rcon:
72 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
73 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
74 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
75 .long   0,0,0,0                                         ?asis
76 Lconsts:
77         mflr    r0
78         bcl     20,31,\$+4
79         mflr    $ptr     #vvvvv "distance between . and rcon
80         addi    $ptr,$ptr,-0x48
81         mtlr    r0
82         blr
83         .long   0
84         .byte   0,12,0x14,0,0,0,0,0
85 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
86
87 .globl  .${prefix}_set_encrypt_key
88 Lset_encrypt_key:
89         mflr            r11
90         $PUSH           r11,$LRSAVE($sp)
91
92         li              $ptr,-1
93         ${UCMP}i        $inp,0
94         beq-            Lenc_key_abort          # if ($inp==0) return -1;
95         ${UCMP}i        $out,0
96         beq-            Lenc_key_abort          # if ($out==0) return -1;
97         li              $ptr,-2
98         cmpwi           $bits,128
99         blt-            Lenc_key_abort
100         cmpwi           $bits,256
101         bgt-            Lenc_key_abort
102         andi.           r0,$bits,0x3f
103         bne-            Lenc_key_abort
104
105         lis             r0,0xfff0
106         mfspr           $vrsave,256
107         mtspr           256,r0
108
109         bl              Lconsts
110         mtlr            r11
111
112         neg             r9,$inp
113         lvx             $in0,0,$inp
114         addi            $inp,$inp,15            # 15 is not typo
115         lvsr            $key,0,r9               # borrow $key
116         li              r8,0x20
117         cmpwi           $bits,192
118         lvx             $in1,0,$inp
119         le?vspltisb     $mask,0x0f              # borrow $mask
120         lvx             $rcon,0,$ptr
121         le?vxor         $key,$key,$mask         # adjust for byte swap
122         lvx             $mask,r8,$ptr
123         addi            $ptr,$ptr,0x10
124         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
125         li              $cnt,8
126         vxor            $zero,$zero,$zero
127         mtctr           $cnt
128
129         ?lvsr           $outperm,0,$out
130         vspltisb        $outmask,-1
131         lvx             $outhead,0,$out
132         ?vperm          $outmask,$zero,$outmask,$outperm
133
134         blt             Loop128
135         addi            $inp,$inp,8
136         beq             L192
137         addi            $inp,$inp,8
138         b               L256
139
140 .align  4
141 Loop128:
142         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
143         vsldoi          $tmp,$zero,$in0,12      # >>32
144          vperm          $outtail,$in0,$in0,$outperm     # rotate
145          vsel           $stage,$outhead,$outtail,$outmask
146          vmr            $outhead,$outtail
147         vcipherlast     $key,$key,$rcon
148          stvx           $stage,0,$out
149          addi           $out,$out,16
150
151         vxor            $in0,$in0,$tmp
152         vsldoi          $tmp,$zero,$tmp,12      # >>32
153         vxor            $in0,$in0,$tmp
154         vsldoi          $tmp,$zero,$tmp,12      # >>32
155         vxor            $in0,$in0,$tmp
156          vadduwm        $rcon,$rcon,$rcon
157         vxor            $in0,$in0,$key
158         bdnz            Loop128
159
160         lvx             $rcon,0,$ptr            # last two round keys
161
162         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
163         vsldoi          $tmp,$zero,$in0,12      # >>32
164          vperm          $outtail,$in0,$in0,$outperm     # rotate
165          vsel           $stage,$outhead,$outtail,$outmask
166          vmr            $outhead,$outtail
167         vcipherlast     $key,$key,$rcon
168          stvx           $stage,0,$out
169          addi           $out,$out,16
170
171         vxor            $in0,$in0,$tmp
172         vsldoi          $tmp,$zero,$tmp,12      # >>32
173         vxor            $in0,$in0,$tmp
174         vsldoi          $tmp,$zero,$tmp,12      # >>32
175         vxor            $in0,$in0,$tmp
176          vadduwm        $rcon,$rcon,$rcon
177         vxor            $in0,$in0,$key
178
179         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
180         vsldoi          $tmp,$zero,$in0,12      # >>32
181          vperm          $outtail,$in0,$in0,$outperm     # rotate
182          vsel           $stage,$outhead,$outtail,$outmask
183          vmr            $outhead,$outtail
184         vcipherlast     $key,$key,$rcon
185          stvx           $stage,0,$out
186          addi           $out,$out,16
187
188         vxor            $in0,$in0,$tmp
189         vsldoi          $tmp,$zero,$tmp,12      # >>32
190         vxor            $in0,$in0,$tmp
191         vsldoi          $tmp,$zero,$tmp,12      # >>32
192         vxor            $in0,$in0,$tmp
193         vxor            $in0,$in0,$key
194          vperm          $outtail,$in0,$in0,$outperm     # rotate
195          vsel           $stage,$outhead,$outtail,$outmask
196          vmr            $outhead,$outtail
197          stvx           $stage,0,$out
198
199         addi            $inp,$out,15            # 15 is not typo
200         addi            $out,$out,0x50
201
202         li              $rounds,10
203         b               Ldone
204
205 .align  4
206 L192:
207         lvx             $tmp,0,$inp
208         li              $cnt,4
209          vperm          $outtail,$in0,$in0,$outperm     # rotate
210          vsel           $stage,$outhead,$outtail,$outmask
211          vmr            $outhead,$outtail
212          stvx           $stage,0,$out
213          addi           $out,$out,16
214         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
215         vspltisb        $key,8                  # borrow $key
216         mtctr           $cnt
217         vsububm         $mask,$mask,$key        # adjust the mask
218
219 Loop192:
220         vperm           $key,$in1,$in1,$mask    # roate-n-splat
221         vsldoi          $tmp,$zero,$in0,12      # >>32
222         vcipherlast     $key,$key,$rcon
223
224         vxor            $in0,$in0,$tmp
225         vsldoi          $tmp,$zero,$tmp,12      # >>32
226         vxor            $in0,$in0,$tmp
227         vsldoi          $tmp,$zero,$tmp,12      # >>32
228         vxor            $in0,$in0,$tmp
229
230          vsldoi         $stage,$zero,$in1,8
231         vspltw          $tmp,$in0,3
232         vxor            $tmp,$tmp,$in1
233         vsldoi          $in1,$zero,$in1,12      # >>32
234          vadduwm        $rcon,$rcon,$rcon
235         vxor            $in1,$in1,$tmp
236         vxor            $in0,$in0,$key
237         vxor            $in1,$in1,$key
238          vsldoi         $stage,$stage,$in0,8
239
240         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
241         vsldoi          $tmp,$zero,$in0,12      # >>32
242          vperm          $outtail,$stage,$stage,$outperm # rotate
243          vsel           $stage,$outhead,$outtail,$outmask
244          vmr            $outhead,$outtail
245         vcipherlast     $key,$key,$rcon
246          stvx           $stage,0,$out
247          addi           $out,$out,16
248
249          vsldoi         $stage,$in0,$in1,8
250         vxor            $in0,$in0,$tmp
251         vsldoi          $tmp,$zero,$tmp,12      # >>32
252          vperm          $outtail,$stage,$stage,$outperm # rotate
253          vsel           $stage,$outhead,$outtail,$outmask
254          vmr            $outhead,$outtail
255         vxor            $in0,$in0,$tmp
256         vsldoi          $tmp,$zero,$tmp,12      # >>32
257         vxor            $in0,$in0,$tmp
258          stvx           $stage,0,$out
259          addi           $out,$out,16
260
261         vspltw          $tmp,$in0,3
262         vxor            $tmp,$tmp,$in1
263         vsldoi          $in1,$zero,$in1,12      # >>32
264          vadduwm        $rcon,$rcon,$rcon
265         vxor            $in1,$in1,$tmp
266         vxor            $in0,$in0,$key
267         vxor            $in1,$in1,$key
268          vperm          $outtail,$in0,$in0,$outperm     # rotate
269          vsel           $stage,$outhead,$outtail,$outmask
270          vmr            $outhead,$outtail
271          stvx           $stage,0,$out
272          addi           $inp,$out,15            # 15 is not typo
273          addi           $out,$out,16
274         bdnz            Loop192
275
276         li              $rounds,12
277         addi            $out,$out,0x20
278         b               Ldone
279
280 .align  4
281 L256:
282         lvx             $tmp,0,$inp
283         li              $cnt,7
284         li              $rounds,14
285          vperm          $outtail,$in0,$in0,$outperm     # rotate
286          vsel           $stage,$outhead,$outtail,$outmask
287          vmr            $outhead,$outtail
288          stvx           $stage,0,$out
289          addi           $out,$out,16
290         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
291         mtctr           $cnt
292
293 Loop256:
294         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
295         vsldoi          $tmp,$zero,$in0,12      # >>32
296          vperm          $outtail,$in1,$in1,$outperm     # rotate
297          vsel           $stage,$outhead,$outtail,$outmask
298          vmr            $outhead,$outtail
299         vcipherlast     $key,$key,$rcon
300          stvx           $stage,0,$out
301          addi           $out,$out,16
302
303         vxor            $in0,$in0,$tmp
304         vsldoi          $tmp,$zero,$tmp,12      # >>32
305         vxor            $in0,$in0,$tmp
306         vsldoi          $tmp,$zero,$tmp,12      # >>32
307         vxor            $in0,$in0,$tmp
308          vadduwm        $rcon,$rcon,$rcon
309         vxor            $in0,$in0,$key
310          vperm          $outtail,$in0,$in0,$outperm     # rotate
311          vsel           $stage,$outhead,$outtail,$outmask
312          vmr            $outhead,$outtail
313          stvx           $stage,0,$out
314          addi           $inp,$out,15            # 15 is not typo
315          addi           $out,$out,16
316         bdz             Ldone
317
318         vspltw          $key,$in0,3             # just splat
319         vsldoi          $tmp,$zero,$in1,12      # >>32
320         vsbox           $key,$key
321
322         vxor            $in1,$in1,$tmp
323         vsldoi          $tmp,$zero,$tmp,12      # >>32
324         vxor            $in1,$in1,$tmp
325         vsldoi          $tmp,$zero,$tmp,12      # >>32
326         vxor            $in1,$in1,$tmp
327
328         vxor            $in1,$in1,$key
329         b               Loop256
330
331 .align  4
332 Ldone:
333         lvx             $in1,0,$inp             # redundant in aligned case
334         vsel            $in1,$outhead,$in1,$outmask
335         stvx            $in1,0,$inp
336         li              $ptr,0
337         mtspr           256,$vrsave
338         stw             $rounds,0($out)
339
340 Lenc_key_abort:
341         mr              r3,$ptr
342         blr
343         .long           0
344         .byte           0,12,0x14,1,0,0,3,0
345         .long           0
346 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
347
348 .globl  .${prefix}_set_decrypt_key
349         $STU            $sp,-$FRAME($sp)
350         mflr            r10
351         $PUSH           r10,$FRAME+$LRSAVE($sp)
352         bl              Lset_encrypt_key
353         mtlr            r10
354
355         cmpwi           r3,0
356         bne-            Ldec_key_abort
357
358         slwi            $cnt,$rounds,4
359         subi            $inp,$out,240           # first round key
360         srwi            $rounds,$rounds,1
361         add             $out,$inp,$cnt          # last round key
362         mtctr           $rounds
363
364 Ldeckey:
365         lwz             r0, 0($inp)
366         lwz             r6, 4($inp)
367         lwz             r7, 8($inp)
368         lwz             r8, 12($inp)
369         addi            $inp,$inp,16
370         lwz             r9, 0($out)
371         lwz             r10,4($out)
372         lwz             r11,8($out)
373         lwz             r12,12($out)
374         stw             r0, 0($out)
375         stw             r6, 4($out)
376         stw             r7, 8($out)
377         stw             r8, 12($out)
378         subi            $out,$out,16
379         stw             r9, -16($inp)
380         stw             r10,-12($inp)
381         stw             r11,-8($inp)
382         stw             r12,-4($inp)
383         bdnz            Ldeckey
384
385         xor             r3,r3,r3                # return value
386 Ldec_key_abort:
387         addi            $sp,$sp,$FRAME
388         blr
389         .long           0
390         .byte           0,12,4,1,0x80,0,3,0
391         .long           0
392 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
393 ___
394 }}}
395 #########################################################################
396 {{{     # Single block en- and decrypt procedures                       #
397 sub gen_block () {
398 my $dir = shift;
399 my $n   = $dir eq "de" ? "n" : "";
400 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
401
402 $code.=<<___;
403 .globl  .${prefix}_${dir}crypt
404         lwz             $rounds,240($key)
405         lis             r0,0xfc00
406         mfspr           $vrsave,256
407         li              $idx,15                 # 15 is not typo
408         mtspr           256,r0
409
410         lvx             v0,0,$inp
411         neg             r11,$out
412         lvx             v1,$idx,$inp
413         lvsl            v2,0,$inp               # inpperm
414         le?vspltisb     v4,0x0f
415         ?lvsl           v3,0,r11                # outperm
416         le?vxor         v2,v2,v4
417         li              $idx,16
418         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
419         lvx             v1,0,$key
420         ?lvsl           v5,0,$key               # keyperm
421         srwi            $rounds,$rounds,1
422         lvx             v2,$idx,$key
423         addi            $idx,$idx,16
424         subi            $rounds,$rounds,1
425         ?vperm          v1,v1,v2,v5             # align round key
426
427         vxor            v0,v0,v1
428         lvx             v1,$idx,$key
429         addi            $idx,$idx,16
430         mtctr           $rounds
431
432 Loop_${dir}c:
433         ?vperm          v2,v2,v1,v5
434         v${n}cipher     v0,v0,v2
435         lvx             v2,$idx,$key
436         addi            $idx,$idx,16
437         ?vperm          v1,v1,v2,v5
438         v${n}cipher     v0,v0,v1
439         lvx             v1,$idx,$key
440         addi            $idx,$idx,16
441         bdnz            Loop_${dir}c
442
443         ?vperm          v2,v2,v1,v5
444         v${n}cipher     v0,v0,v2
445         lvx             v2,$idx,$key
446         ?vperm          v1,v1,v2,v5
447         v${n}cipherlast v0,v0,v1
448
449         vspltisb        v2,-1
450         vxor            v1,v1,v1
451         li              $idx,15                 # 15 is not typo
452         ?vperm          v2,v1,v2,v3             # outmask
453         le?vxor         v3,v3,v4
454         lvx             v1,0,$out               # outhead
455         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
456         vsel            v1,v1,v0,v2
457         lvx             v4,$idx,$out
458         stvx            v1,0,$out
459         vsel            v0,v0,v4,v2
460         stvx            v0,$idx,$out
461
462         mtspr           256,$vrsave
463         blr
464         .long           0
465         .byte           0,12,0x14,0,0,0,3,0
466         .long           0
467 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
468 ___
469 }
470 &gen_block("en");
471 &gen_block("de");
472 }}}
473 #########################################################################
474 {{{     # CBC en- and decrypt procedures                                #
475 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
476 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
477 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
478                                                 map("v$_",(4..10));
479 $code.=<<___;
480 .globl  .${prefix}_cbc_encrypt
481         ${UCMP}i        $len,16
482         bltlr-
483
484         cmpwi           $enc,0                  # test direction
485         lis             r0,0xffe0
486         mfspr           $vrsave,256
487         mtspr           256,r0
488
489         li              $idx,15
490         vxor            $rndkey0,$rndkey0,$rndkey0
491         le?vspltisb     $tmp,0x0f
492
493         lvx             $ivec,0,$ivp            # load [unaligned] iv
494         lvsl            $inpperm,0,$ivp
495         lvx             $inptail,$idx,$ivp
496         le?vxor         $inpperm,$inpperm,$tmp
497         vperm           $ivec,$ivec,$inptail,$inpperm
498
499         neg             r11,$inp
500         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
501         lwz             $rounds,240($key)
502
503         lvsr            $inpperm,0,r11          # prepare for unaligned load
504         lvx             $inptail,0,$inp
505         addi            $inp,$inp,15            # 15 is not typo
506         le?vxor         $inpperm,$inpperm,$tmp
507
508         ?lvsr           $outperm,0,$out         # prepare for unaligned store
509         vspltisb        $outmask,-1
510         lvx             $outhead,0,$out
511         ?vperm          $outmask,$rndkey0,$outmask,$outperm
512         le?vxor         $outperm,$outperm,$tmp
513
514         srwi            $rounds,$rounds,1
515         li              $idx,16
516         subi            $rounds,$rounds,1
517         beq             Lcbc_dec
518
519 Lcbc_enc:
520         vmr             $inout,$inptail
521         lvx             $inptail,0,$inp
522         addi            $inp,$inp,16
523         mtctr           $rounds
524         subi            $len,$len,16            # len-=16
525
526         lvx             $rndkey0,0,$key
527          vperm          $inout,$inout,$inptail,$inpperm
528         lvx             $rndkey1,$idx,$key
529         addi            $idx,$idx,16
530         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
531         vxor            $inout,$inout,$rndkey0
532         lvx             $rndkey0,$idx,$key
533         addi            $idx,$idx,16
534         vxor            $inout,$inout,$ivec
535
536 Loop_cbc_enc:
537         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
538         vcipher         $inout,$inout,$rndkey1
539         lvx             $rndkey1,$idx,$key
540         addi            $idx,$idx,16
541         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
542         vcipher         $inout,$inout,$rndkey0
543         lvx             $rndkey0,$idx,$key
544         addi            $idx,$idx,16
545         bdnz            Loop_cbc_enc
546
547         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
548         vcipher         $inout,$inout,$rndkey1
549         lvx             $rndkey1,$idx,$key
550         li              $idx,16
551         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
552         vcipherlast     $ivec,$inout,$rndkey0
553         ${UCMP}i        $len,16
554
555         vperm           $tmp,$ivec,$ivec,$outperm
556         vsel            $inout,$outhead,$tmp,$outmask
557         vmr             $outhead,$tmp
558         stvx            $inout,0,$out
559         addi            $out,$out,16
560         bge             Lcbc_enc
561
562         b               Lcbc_done
563
564 .align  4
565 Lcbc_dec:
566         ${UCMP}i        $len,128
567         bge             _aesp8_cbc_decrypt8x
568         vmr             $tmp,$inptail
569         lvx             $inptail,0,$inp
570         addi            $inp,$inp,16
571         mtctr           $rounds
572         subi            $len,$len,16            # len-=16
573
574         lvx             $rndkey0,0,$key
575          vperm          $tmp,$tmp,$inptail,$inpperm
576         lvx             $rndkey1,$idx,$key
577         addi            $idx,$idx,16
578         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
579         vxor            $inout,$tmp,$rndkey0
580         lvx             $rndkey0,$idx,$key
581         addi            $idx,$idx,16
582
583 Loop_cbc_dec:
584         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
585         vncipher        $inout,$inout,$rndkey1
586         lvx             $rndkey1,$idx,$key
587         addi            $idx,$idx,16
588         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
589         vncipher        $inout,$inout,$rndkey0
590         lvx             $rndkey0,$idx,$key
591         addi            $idx,$idx,16
592         bdnz            Loop_cbc_dec
593
594         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
595         vncipher        $inout,$inout,$rndkey1
596         lvx             $rndkey1,$idx,$key
597         li              $idx,16
598         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
599         vncipherlast    $inout,$inout,$rndkey0
600         ${UCMP}i        $len,16
601
602         vxor            $inout,$inout,$ivec
603         vmr             $ivec,$tmp
604         vperm           $tmp,$inout,$inout,$outperm
605         vsel            $inout,$outhead,$tmp,$outmask
606         vmr             $outhead,$tmp
607         stvx            $inout,0,$out
608         addi            $out,$out,16
609         bge             Lcbc_dec
610
611 Lcbc_done:
612         addi            $out,$out,-1
613         lvx             $inout,0,$out           # redundant in aligned case
614         vsel            $inout,$outhead,$inout,$outmask
615         stvx            $inout,0,$out
616
617         neg             $enc,$ivp               # write [unaligned] iv
618         li              $idx,15                 # 15 is not typo
619         vxor            $rndkey0,$rndkey0,$rndkey0
620         vspltisb        $outmask,-1
621         le?vspltisb     $tmp,0x0f
622         ?lvsl           $outperm,0,$enc
623         ?vperm          $outmask,$rndkey0,$outmask,$outperm
624         le?vxor         $outperm,$outperm,$tmp
625         lvx             $outhead,0,$ivp
626         vperm           $ivec,$ivec,$ivec,$outperm
627         vsel            $inout,$outhead,$ivec,$outmask
628         lvx             $inptail,$idx,$ivp
629         stvx            $inout,0,$ivp
630         vsel            $inout,$ivec,$inptail,$outmask
631         stvx            $inout,$idx,$ivp
632
633         mtspr           256,$vrsave
634         blr
635         .long           0
636         .byte           0,12,0x14,0,0,0,6,0
637         .long           0
638 ___
639 #########################################################################
640 {{      # Optimized CBC decrypt procedure                               #
641 my $key_="r11";
642 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
643 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
644 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
645 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
646                         # v26-v31 last 6 round keys
647 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
648
649 $code.=<<___;
650 .align  5
651 _aesp8_cbc_decrypt8x:
652         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
653         li              r10,`$FRAME+8*16+15`
654         li              r11,`$FRAME+8*16+31`
655         stvx            v20,r10,$sp             # ABI says so
656         addi            r10,r10,32
657         stvx            v21,r11,$sp
658         addi            r11,r11,32
659         stvx            v22,r10,$sp
660         addi            r10,r10,32
661         stvx            v23,r11,$sp
662         addi            r11,r11,32
663         stvx            v24,r10,$sp
664         addi            r10,r10,32
665         stvx            v25,r11,$sp
666         addi            r11,r11,32
667         stvx            v26,r10,$sp
668         addi            r10,r10,32
669         stvx            v27,r11,$sp
670         addi            r11,r11,32
671         stvx            v28,r10,$sp
672         addi            r10,r10,32
673         stvx            v29,r11,$sp
674         addi            r11,r11,32
675         stvx            v30,r10,$sp
676         stvx            v31,r11,$sp
677         li              r0,-1
678         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
679         li              $x10,0x10
680         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
681         li              $x20,0x20
682         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
683         li              $x30,0x30
684         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
685         li              $x40,0x40
686         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
687         li              $x50,0x50
688         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
689         li              $x60,0x60
690         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
691         li              $x70,0x70
692         mtspr           256,r0
693
694         subi            $rounds,$rounds,3       # -4 in total
695         subi            $len,$len,128           # bias
696
697         lvx             $rndkey0,$x00,$key      # load key schedule
698         lvx             v30,$x10,$key
699         addi            $key,$key,0x20
700         lvx             v31,$x00,$key
701         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
702         addi            $key_,$sp,$FRAME+15
703         mtctr           $rounds
704
705 Load_cbc_dec_key:
706         ?vperm          v24,v30,v31,$keyperm
707         lvx             v30,$x10,$key
708         addi            $key,$key,0x20
709         stvx            v24,$x00,$key_          # off-load round[1]
710         ?vperm          v25,v31,v30,$keyperm
711         lvx             v31,$x00,$key
712         stvx            v25,$x10,$key_          # off-load round[2]
713         addi            $key_,$key_,0x20
714         bdnz            Load_cbc_dec_key
715
716         lvx             v26,$x10,$key
717         ?vperm          v24,v30,v31,$keyperm
718         lvx             v27,$x20,$key
719         stvx            v24,$x00,$key_          # off-load round[3]
720         ?vperm          v25,v31,v26,$keyperm
721         lvx             v28,$x30,$key
722         stvx            v25,$x10,$key_          # off-load round[4]
723         addi            $key_,$sp,$FRAME+15     # rewind $key_
724         ?vperm          v26,v26,v27,$keyperm
725         lvx             v29,$x40,$key
726         ?vperm          v27,v27,v28,$keyperm
727         lvx             v30,$x50,$key
728         ?vperm          v28,v28,v29,$keyperm
729         lvx             v31,$x60,$key
730         ?vperm          v29,v29,v30,$keyperm
731         lvx             $out0,$x70,$key         # borrow $out0
732         ?vperm          v30,v30,v31,$keyperm
733         lvx             v24,$x00,$key_          # pre-load round[1]
734         ?vperm          v31,v31,$out0,$keyperm
735         lvx             v25,$x10,$key_          # pre-load round[2]
736
737         #lvx            $inptail,0,$inp         # "caller" already did this
738         #addi           $inp,$inp,15            # 15 is not typo
739         subi            $inp,$inp,15            # undo "caller"
740
741          le?li          $idx,8
742         lvx_u           $in0,$x00,$inp          # load first 8 "words"
743          le?lvsl        $inpperm,0,$idx
744          le?vspltisb    $tmp,0x0f
745         lvx_u           $in1,$x10,$inp
746          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
747         lvx_u           $in2,$x20,$inp
748          le?vperm       $in0,$in0,$in0,$inpperm
749         lvx_u           $in3,$x30,$inp
750          le?vperm       $in1,$in1,$in1,$inpperm
751         lvx_u           $in4,$x40,$inp
752          le?vperm       $in2,$in2,$in2,$inpperm
753         vxor            $out0,$in0,$rndkey0
754         lvx_u           $in5,$x50,$inp
755          le?vperm       $in3,$in3,$in3,$inpperm
756         vxor            $out1,$in1,$rndkey0
757         lvx_u           $in6,$x60,$inp
758          le?vperm       $in4,$in4,$in4,$inpperm
759         vxor            $out2,$in2,$rndkey0
760         lvx_u           $in7,$x70,$inp
761         addi            $inp,$inp,0x80
762          le?vperm       $in5,$in5,$in5,$inpperm
763         vxor            $out3,$in3,$rndkey0
764          le?vperm       $in6,$in6,$in6,$inpperm
765         vxor            $out4,$in4,$rndkey0
766          le?vperm       $in7,$in7,$in7,$inpperm
767         vxor            $out5,$in5,$rndkey0
768         vxor            $out6,$in6,$rndkey0
769         vxor            $out7,$in7,$rndkey0
770
771         mtctr           $rounds
772         b               Loop_cbc_dec8x
773 .align  5
774 Loop_cbc_dec8x:
775         vncipher        $out0,$out0,v24
776         vncipher        $out1,$out1,v24
777         vncipher        $out2,$out2,v24
778         vncipher        $out3,$out3,v24
779         vncipher        $out4,$out4,v24
780         vncipher        $out5,$out5,v24
781         vncipher        $out6,$out6,v24
782         vncipher        $out7,$out7,v24
783         lvx             v24,$x20,$key_          # round[3]
784         addi            $key_,$key_,0x20
785
786         vncipher        $out0,$out0,v25
787         vncipher        $out1,$out1,v25
788         vncipher        $out2,$out2,v25
789         vncipher        $out3,$out3,v25
790         vncipher        $out4,$out4,v25
791         vncipher        $out5,$out5,v25
792         vncipher        $out6,$out6,v25
793         vncipher        $out7,$out7,v25
794         lvx             v25,$x10,$key_          # round[4]
795         bdnz            Loop_cbc_dec8x
796
797         subic           $len,$len,128           # $len-=128
798         vncipher        $out0,$out0,v24
799         vncipher        $out1,$out1,v24
800         vncipher        $out2,$out2,v24
801         vncipher        $out3,$out3,v24
802         vncipher        $out4,$out4,v24
803         vncipher        $out5,$out5,v24
804         vncipher        $out6,$out6,v24
805         vncipher        $out7,$out7,v24
806
807         subfe.          r0,r0,r0                # borrow?-1:0
808         vncipher        $out0,$out0,v25
809         vncipher        $out1,$out1,v25
810         vncipher        $out2,$out2,v25
811         vncipher        $out3,$out3,v25
812         vncipher        $out4,$out4,v25
813         vncipher        $out5,$out5,v25
814         vncipher        $out6,$out6,v25
815         vncipher        $out7,$out7,v25
816
817         and             r0,r0,$len
818         vncipher        $out0,$out0,v26
819         vncipher        $out1,$out1,v26
820         vncipher        $out2,$out2,v26
821         vncipher        $out3,$out3,v26
822         vncipher        $out4,$out4,v26
823         vncipher        $out5,$out5,v26
824         vncipher        $out6,$out6,v26
825         vncipher        $out7,$out7,v26
826
827         add             $inp,$inp,r0            # $inp is adjusted in such
828                                                 # way that at exit from the
829                                                 # loop inX-in7 are loaded
830                                                 # with last "words"
831         vncipher        $out0,$out0,v27
832         vncipher        $out1,$out1,v27
833         vncipher        $out2,$out2,v27
834         vncipher        $out3,$out3,v27
835         vncipher        $out4,$out4,v27
836         vncipher        $out5,$out5,v27
837         vncipher        $out6,$out6,v27
838         vncipher        $out7,$out7,v27
839
840         addi            $key_,$sp,$FRAME+15     # rewind $key_
841         vncipher        $out0,$out0,v28
842         vncipher        $out1,$out1,v28
843         vncipher        $out2,$out2,v28
844         vncipher        $out3,$out3,v28
845         vncipher        $out4,$out4,v28
846         vncipher        $out5,$out5,v28
847         vncipher        $out6,$out6,v28
848         vncipher        $out7,$out7,v28
849         lvx             v24,$x00,$key_          # re-pre-load round[1]
850
851         vncipher        $out0,$out0,v29
852         vncipher        $out1,$out1,v29
853         vncipher        $out2,$out2,v29
854         vncipher        $out3,$out3,v29
855         vncipher        $out4,$out4,v29
856         vncipher        $out5,$out5,v29
857         vncipher        $out6,$out6,v29
858         vncipher        $out7,$out7,v29
859         lvx             v25,$x10,$key_          # re-pre-load round[2]
860
861         vncipher        $out0,$out0,v30
862          vxor           $ivec,$ivec,v31         # xor with last round key
863         vncipher        $out1,$out1,v30
864          vxor           $in0,$in0,v31
865         vncipher        $out2,$out2,v30
866          vxor           $in1,$in1,v31
867         vncipher        $out3,$out3,v30
868          vxor           $in2,$in2,v31
869         vncipher        $out4,$out4,v30
870          vxor           $in3,$in3,v31
871         vncipher        $out5,$out5,v30
872          vxor           $in4,$in4,v31
873         vncipher        $out6,$out6,v30
874          vxor           $in5,$in5,v31
875         vncipher        $out7,$out7,v30
876          vxor           $in6,$in6,v31
877
878         vncipherlast    $out0,$out0,$ivec
879         vncipherlast    $out1,$out1,$in0
880          lvx_u          $in0,$x00,$inp          # load next input block
881         vncipherlast    $out2,$out2,$in1
882          lvx_u          $in1,$x10,$inp
883         vncipherlast    $out3,$out3,$in2
884          le?vperm       $in0,$in0,$in0,$inpperm
885          lvx_u          $in2,$x20,$inp
886         vncipherlast    $out4,$out4,$in3
887          le?vperm       $in1,$in1,$in1,$inpperm
888          lvx_u          $in3,$x30,$inp
889         vncipherlast    $out5,$out5,$in4
890          le?vperm       $in2,$in2,$in2,$inpperm
891          lvx_u          $in4,$x40,$inp
892         vncipherlast    $out6,$out6,$in5
893          le?vperm       $in3,$in3,$in3,$inpperm
894          lvx_u          $in5,$x50,$inp
895         vncipherlast    $out7,$out7,$in6
896          le?vperm       $in4,$in4,$in4,$inpperm
897          lvx_u          $in6,$x60,$inp
898         vmr             $ivec,$in7
899          le?vperm       $in5,$in5,$in5,$inpperm
900          lvx_u          $in7,$x70,$inp
901          addi           $inp,$inp,0x80
902
903         le?vperm        $out0,$out0,$out0,$inpperm
904         le?vperm        $out1,$out1,$out1,$inpperm
905         stvx_u          $out0,$x00,$out
906          le?vperm       $in6,$in6,$in6,$inpperm
907          vxor           $out0,$in0,$rndkey0
908         le?vperm        $out2,$out2,$out2,$inpperm
909         stvx_u          $out1,$x10,$out
910          le?vperm       $in7,$in7,$in7,$inpperm
911          vxor           $out1,$in1,$rndkey0
912         le?vperm        $out3,$out3,$out3,$inpperm
913         stvx_u          $out2,$x20,$out
914          vxor           $out2,$in2,$rndkey0
915         le?vperm        $out4,$out4,$out4,$inpperm
916         stvx_u          $out3,$x30,$out
917          vxor           $out3,$in3,$rndkey0
918         le?vperm        $out5,$out5,$out5,$inpperm
919         stvx_u          $out4,$x40,$out
920          vxor           $out4,$in4,$rndkey0
921         le?vperm        $out6,$out6,$out6,$inpperm
922         stvx_u          $out5,$x50,$out
923          vxor           $out5,$in5,$rndkey0
924         le?vperm        $out7,$out7,$out7,$inpperm
925         stvx_u          $out6,$x60,$out
926          vxor           $out6,$in6,$rndkey0
927         stvx_u          $out7,$x70,$out
928         addi            $out,$out,0x80
929          vxor           $out7,$in7,$rndkey0
930
931         mtctr           $rounds
932         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
933
934         addic.          $len,$len,128
935         beq             Lcbc_dec8x_done
936         nop
937         nop
938
939 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
940         vncipher        $out1,$out1,v24
941         vncipher        $out2,$out2,v24
942         vncipher        $out3,$out3,v24
943         vncipher        $out4,$out4,v24
944         vncipher        $out5,$out5,v24
945         vncipher        $out6,$out6,v24
946         vncipher        $out7,$out7,v24
947         lvx             v24,$x20,$key_          # round[3]
948         addi            $key_,$key_,0x20
949
950         vncipher        $out1,$out1,v25
951         vncipher        $out2,$out2,v25
952         vncipher        $out3,$out3,v25
953         vncipher        $out4,$out4,v25
954         vncipher        $out5,$out5,v25
955         vncipher        $out6,$out6,v25
956         vncipher        $out7,$out7,v25
957         lvx             v25,$x10,$key_          # round[4]
958         bdnz            Loop_cbc_dec8x_tail
959
960         vncipher        $out1,$out1,v24
961         vncipher        $out2,$out2,v24
962         vncipher        $out3,$out3,v24
963         vncipher        $out4,$out4,v24
964         vncipher        $out5,$out5,v24
965         vncipher        $out6,$out6,v24
966         vncipher        $out7,$out7,v24
967
968         vncipher        $out1,$out1,v25
969         vncipher        $out2,$out2,v25
970         vncipher        $out3,$out3,v25
971         vncipher        $out4,$out4,v25
972         vncipher        $out5,$out5,v25
973         vncipher        $out6,$out6,v25
974         vncipher        $out7,$out7,v25
975
976         vncipher        $out1,$out1,v26
977         vncipher        $out2,$out2,v26
978         vncipher        $out3,$out3,v26
979         vncipher        $out4,$out4,v26
980         vncipher        $out5,$out5,v26
981         vncipher        $out6,$out6,v26
982         vncipher        $out7,$out7,v26
983
984         vncipher        $out1,$out1,v27
985         vncipher        $out2,$out2,v27
986         vncipher        $out3,$out3,v27
987         vncipher        $out4,$out4,v27
988         vncipher        $out5,$out5,v27
989         vncipher        $out6,$out6,v27
990         vncipher        $out7,$out7,v27
991
992         vncipher        $out1,$out1,v28
993         vncipher        $out2,$out2,v28
994         vncipher        $out3,$out3,v28
995         vncipher        $out4,$out4,v28
996         vncipher        $out5,$out5,v28
997         vncipher        $out6,$out6,v28
998         vncipher        $out7,$out7,v28
999
1000         vncipher        $out1,$out1,v29
1001         vncipher        $out2,$out2,v29
1002         vncipher        $out3,$out3,v29
1003         vncipher        $out4,$out4,v29
1004         vncipher        $out5,$out5,v29
1005         vncipher        $out6,$out6,v29
1006         vncipher        $out7,$out7,v29
1007
1008         vncipher        $out1,$out1,v30
1009          vxor           $ivec,$ivec,v31         # last round key
1010         vncipher        $out2,$out2,v30
1011          vxor           $in1,$in1,v31
1012         vncipher        $out3,$out3,v30
1013          vxor           $in2,$in2,v31
1014         vncipher        $out4,$out4,v30
1015          vxor           $in3,$in3,v31
1016         vncipher        $out5,$out5,v30
1017          vxor           $in4,$in4,v31
1018         vncipher        $out6,$out6,v30
1019          vxor           $in5,$in5,v31
1020         vncipher        $out7,$out7,v30
1021          vxor           $in6,$in6,v31
1022
1023         cmplwi          $len,32                 # switch($len)
1024         blt             Lcbc_dec8x_one
1025         nop
1026         beq             Lcbc_dec8x_two
1027         cmplwi          $len,64
1028         blt             Lcbc_dec8x_three
1029         nop
1030         beq             Lcbc_dec8x_four
1031         cmplwi          $len,96
1032         blt             Lcbc_dec8x_five
1033         nop
1034         beq             Lcbc_dec8x_six
1035
1036 Lcbc_dec8x_seven:
1037         vncipherlast    $out1,$out1,$ivec
1038         vncipherlast    $out2,$out2,$in1
1039         vncipherlast    $out3,$out3,$in2
1040         vncipherlast    $out4,$out4,$in3
1041         vncipherlast    $out5,$out5,$in4
1042         vncipherlast    $out6,$out6,$in5
1043         vncipherlast    $out7,$out7,$in6
1044         vmr             $ivec,$in7
1045
1046         le?vperm        $out1,$out1,$out1,$inpperm
1047         le?vperm        $out2,$out2,$out2,$inpperm
1048         stvx_u          $out1,$x00,$out
1049         le?vperm        $out3,$out3,$out3,$inpperm
1050         stvx_u          $out2,$x10,$out
1051         le?vperm        $out4,$out4,$out4,$inpperm
1052         stvx_u          $out3,$x20,$out
1053         le?vperm        $out5,$out5,$out5,$inpperm
1054         stvx_u          $out4,$x30,$out
1055         le?vperm        $out6,$out6,$out6,$inpperm
1056         stvx_u          $out5,$x40,$out
1057         le?vperm        $out7,$out7,$out7,$inpperm
1058         stvx_u          $out6,$x50,$out
1059         stvx_u          $out7,$x60,$out
1060         addi            $out,$out,0x70
1061         b               Lcbc_dec8x_done
1062
1063 .align  5
1064 Lcbc_dec8x_six:
1065         vncipherlast    $out2,$out2,$ivec
1066         vncipherlast    $out3,$out3,$in2
1067         vncipherlast    $out4,$out4,$in3
1068         vncipherlast    $out5,$out5,$in4
1069         vncipherlast    $out6,$out6,$in5
1070         vncipherlast    $out7,$out7,$in6
1071         vmr             $ivec,$in7
1072
1073         le?vperm        $out2,$out2,$out2,$inpperm
1074         le?vperm        $out3,$out3,$out3,$inpperm
1075         stvx_u          $out2,$x00,$out
1076         le?vperm        $out4,$out4,$out4,$inpperm
1077         stvx_u          $out3,$x10,$out
1078         le?vperm        $out5,$out5,$out5,$inpperm
1079         stvx_u          $out4,$x20,$out
1080         le?vperm        $out6,$out6,$out6,$inpperm
1081         stvx_u          $out5,$x30,$out
1082         le?vperm        $out7,$out7,$out7,$inpperm
1083         stvx_u          $out6,$x40,$out
1084         stvx_u          $out7,$x50,$out
1085         addi            $out,$out,0x60
1086         b               Lcbc_dec8x_done
1087
1088 .align  5
1089 Lcbc_dec8x_five:
1090         vncipherlast    $out3,$out3,$ivec
1091         vncipherlast    $out4,$out4,$in3
1092         vncipherlast    $out5,$out5,$in4
1093         vncipherlast    $out6,$out6,$in5
1094         vncipherlast    $out7,$out7,$in6
1095         vmr             $ivec,$in7
1096
1097         le?vperm        $out3,$out3,$out3,$inpperm
1098         le?vperm        $out4,$out4,$out4,$inpperm
1099         stvx_u          $out3,$x00,$out
1100         le?vperm        $out5,$out5,$out5,$inpperm
1101         stvx_u          $out4,$x10,$out
1102         le?vperm        $out6,$out6,$out6,$inpperm
1103         stvx_u          $out5,$x20,$out
1104         le?vperm        $out7,$out7,$out7,$inpperm
1105         stvx_u          $out6,$x30,$out
1106         stvx_u          $out7,$x40,$out
1107         addi            $out,$out,0x50
1108         b               Lcbc_dec8x_done
1109
1110 .align  5
1111 Lcbc_dec8x_four:
1112         vncipherlast    $out4,$out4,$ivec
1113         vncipherlast    $out5,$out5,$in4
1114         vncipherlast    $out6,$out6,$in5
1115         vncipherlast    $out7,$out7,$in6
1116         vmr             $ivec,$in7
1117
1118         le?vperm        $out4,$out4,$out4,$inpperm
1119         le?vperm        $out5,$out5,$out5,$inpperm
1120         stvx_u          $out4,$x00,$out
1121         le?vperm        $out6,$out6,$out6,$inpperm
1122         stvx_u          $out5,$x10,$out
1123         le?vperm        $out7,$out7,$out7,$inpperm
1124         stvx_u          $out6,$x20,$out
1125         stvx_u          $out7,$x30,$out
1126         addi            $out,$out,0x40
1127         b               Lcbc_dec8x_done
1128
1129 .align  5
1130 Lcbc_dec8x_three:
1131         vncipherlast    $out5,$out5,$ivec
1132         vncipherlast    $out6,$out6,$in5
1133         vncipherlast    $out7,$out7,$in6
1134         vmr             $ivec,$in7
1135
1136         le?vperm        $out5,$out5,$out5,$inpperm
1137         le?vperm        $out6,$out6,$out6,$inpperm
1138         stvx_u          $out5,$x00,$out
1139         le?vperm        $out7,$out7,$out7,$inpperm
1140         stvx_u          $out6,$x10,$out
1141         stvx_u          $out7,$x20,$out
1142         addi            $out,$out,0x30
1143         b               Lcbc_dec8x_done
1144
1145 .align  5
1146 Lcbc_dec8x_two:
1147         vncipherlast    $out6,$out6,$ivec
1148         vncipherlast    $out7,$out7,$in6
1149         vmr             $ivec,$in7
1150
1151         le?vperm        $out6,$out6,$out6,$inpperm
1152         le?vperm        $out7,$out7,$out7,$inpperm
1153         stvx_u          $out6,$x00,$out
1154         stvx_u          $out7,$x10,$out
1155         addi            $out,$out,0x20
1156         b               Lcbc_dec8x_done
1157
1158 .align  5
1159 Lcbc_dec8x_one:
1160         vncipherlast    $out7,$out7,$ivec
1161         vmr             $ivec,$in7
1162
1163         le?vperm        $out7,$out7,$out7,$inpperm
1164         stvx_u          $out7,0,$out
1165         addi            $out,$out,0x10
1166
1167 Lcbc_dec8x_done:
1168         le?vperm        $ivec,$ivec,$ivec,$inpperm
1169         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1170
1171         li              r10,`$FRAME+15`
1172         li              r11,`$FRAME+31`
1173         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1174         addi            r10,r10,32
1175         stvx            $inpperm,r11,$sp
1176         addi            r11,r11,32
1177         stvx            $inpperm,r10,$sp
1178         addi            r10,r10,32
1179         stvx            $inpperm,r11,$sp
1180         addi            r11,r11,32
1181         stvx            $inpperm,r10,$sp
1182         addi            r10,r10,32
1183         stvx            $inpperm,r11,$sp
1184         addi            r11,r11,32
1185         stvx            $inpperm,r10,$sp
1186         addi            r10,r10,32
1187         stvx            $inpperm,r11,$sp
1188         addi            r11,r11,32
1189
1190         mtspr           256,$vrsave
1191         lvx             v20,r10,$sp             # ABI says so
1192         addi            r10,r10,32
1193         lvx             v21,r11,$sp
1194         addi            r11,r11,32
1195         lvx             v22,r10,$sp
1196         addi            r10,r10,32
1197         lvx             v23,r11,$sp
1198         addi            r11,r11,32
1199         lvx             v24,r10,$sp
1200         addi            r10,r10,32
1201         lvx             v25,r11,$sp
1202         addi            r11,r11,32
1203         lvx             v26,r10,$sp
1204         addi            r10,r10,32
1205         lvx             v27,r11,$sp
1206         addi            r11,r11,32
1207         lvx             v28,r10,$sp
1208         addi            r10,r10,32
1209         lvx             v29,r11,$sp
1210         addi            r11,r11,32
1211         lvx             v30,r10,$sp
1212         lvx             v31,r11,$sp
1213         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1214         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1215         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1216         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1217         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1218         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1219         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1220         blr
1221         .long           0
1222         .byte           0,12,0x14,0,0x80,6,6,0
1223         .long           0
1224 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1225 ___
1226 }}      }}}
1227
1228 #########################################################################
1229 {{{     # CTR procedure[s]                                              #
1230 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1231 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1232 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1233                                                 map("v$_",(4..11));
1234 my $dat=$tmp;
1235
1236 $code.=<<___;
1237 .globl  .${prefix}_ctr32_encrypt_blocks
1238         ${UCMP}i        $len,1
1239         bltlr-
1240
1241         lis             r0,0xfff0
1242         mfspr           $vrsave,256
1243         mtspr           256,r0
1244
1245         li              $idx,15
1246         vxor            $rndkey0,$rndkey0,$rndkey0
1247         le?vspltisb     $tmp,0x0f
1248
1249         lvx             $ivec,0,$ivp            # load [unaligned] iv
1250         lvsl            $inpperm,0,$ivp
1251         lvx             $inptail,$idx,$ivp
1252          vspltisb       $one,1
1253         le?vxor         $inpperm,$inpperm,$tmp
1254         vperm           $ivec,$ivec,$inptail,$inpperm
1255          vsldoi         $one,$rndkey0,$one,1
1256
1257         neg             r11,$inp
1258         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1259         lwz             $rounds,240($key)
1260
1261         lvsr            $inpperm,0,r11          # prepare for unaligned load
1262         lvx             $inptail,0,$inp
1263         addi            $inp,$inp,15            # 15 is not typo
1264         le?vxor         $inpperm,$inpperm,$tmp
1265
1266         srwi            $rounds,$rounds,1
1267         li              $idx,16
1268         subi            $rounds,$rounds,1
1269
1270         ${UCMP}i        $len,8
1271         bge             _aesp8_ctr32_encrypt8x
1272
1273         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1274         vspltisb        $outmask,-1
1275         lvx             $outhead,0,$out
1276         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1277         le?vxor         $outperm,$outperm,$tmp
1278
1279         lvx             $rndkey0,0,$key
1280         mtctr           $rounds
1281         lvx             $rndkey1,$idx,$key
1282         addi            $idx,$idx,16
1283         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1284         vxor            $inout,$ivec,$rndkey0
1285         lvx             $rndkey0,$idx,$key
1286         addi            $idx,$idx,16
1287         b               Loop_ctr32_enc
1288
1289 .align  5
1290 Loop_ctr32_enc:
1291         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1292         vcipher         $inout,$inout,$rndkey1
1293         lvx             $rndkey1,$idx,$key
1294         addi            $idx,$idx,16
1295         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1296         vcipher         $inout,$inout,$rndkey0
1297         lvx             $rndkey0,$idx,$key
1298         addi            $idx,$idx,16
1299         bdnz            Loop_ctr32_enc
1300
1301         vadduwm         $ivec,$ivec,$one
1302          vmr            $dat,$inptail
1303          lvx            $inptail,0,$inp
1304          addi           $inp,$inp,16
1305          subic.         $len,$len,1             # blocks--
1306
1307         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1308         vcipher         $inout,$inout,$rndkey1
1309         lvx             $rndkey1,$idx,$key
1310          vperm          $dat,$dat,$inptail,$inpperm
1311          li             $idx,16
1312         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1313          lvx            $rndkey0,0,$key
1314         vxor            $dat,$dat,$rndkey1      # last round key
1315         vcipherlast     $inout,$inout,$dat
1316
1317          lvx            $rndkey1,$idx,$key
1318          addi           $idx,$idx,16
1319         vperm           $inout,$inout,$inout,$outperm
1320         vsel            $dat,$outhead,$inout,$outmask
1321          mtctr          $rounds
1322          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1323         vmr             $outhead,$inout
1324          vxor           $inout,$ivec,$rndkey0
1325          lvx            $rndkey0,$idx,$key
1326          addi           $idx,$idx,16
1327         stvx            $dat,0,$out
1328         addi            $out,$out,16
1329         bne             Loop_ctr32_enc
1330
1331         addi            $out,$out,-1
1332         lvx             $inout,0,$out           # redundant in aligned case
1333         vsel            $inout,$outhead,$inout,$outmask
1334         stvx            $inout,0,$out
1335
1336         mtspr           256,$vrsave
1337         blr
1338         .long           0
1339         .byte           0,12,0x14,0,0,0,6,0
1340         .long           0
1341 ___
1342 #########################################################################
1343 {{      # Optimized CTR procedure                                       #
1344 my $key_="r11";
1345 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1346 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1347 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1348 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1349                         # v26-v31 last 6 round keys
1350 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1351 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1352
1353 $code.=<<___;
1354 .align  5
1355 _aesp8_ctr32_encrypt8x:
1356         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1357         li              r10,`$FRAME+8*16+15`
1358         li              r11,`$FRAME+8*16+31`
1359         stvx            v20,r10,$sp             # ABI says so
1360         addi            r10,r10,32
1361         stvx            v21,r11,$sp
1362         addi            r11,r11,32
1363         stvx            v22,r10,$sp
1364         addi            r10,r10,32
1365         stvx            v23,r11,$sp
1366         addi            r11,r11,32
1367         stvx            v24,r10,$sp
1368         addi            r10,r10,32
1369         stvx            v25,r11,$sp
1370         addi            r11,r11,32
1371         stvx            v26,r10,$sp
1372         addi            r10,r10,32
1373         stvx            v27,r11,$sp
1374         addi            r11,r11,32
1375         stvx            v28,r10,$sp
1376         addi            r10,r10,32
1377         stvx            v29,r11,$sp
1378         addi            r11,r11,32
1379         stvx            v30,r10,$sp
1380         stvx            v31,r11,$sp
1381         li              r0,-1
1382         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1383         li              $x10,0x10
1384         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1385         li              $x20,0x20
1386         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1387         li              $x30,0x30
1388         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1389         li              $x40,0x40
1390         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1391         li              $x50,0x50
1392         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1393         li              $x60,0x60
1394         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1395         li              $x70,0x70
1396         mtspr           256,r0
1397
1398         subi            $rounds,$rounds,3       # -4 in total
1399
1400         lvx             $rndkey0,$x00,$key      # load key schedule
1401         lvx             v30,$x10,$key
1402         addi            $key,$key,0x20
1403         lvx             v31,$x00,$key
1404         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1405         addi            $key_,$sp,$FRAME+15
1406         mtctr           $rounds
1407
1408 Load_ctr32_enc_key:
1409         ?vperm          v24,v30,v31,$keyperm
1410         lvx             v30,$x10,$key
1411         addi            $key,$key,0x20
1412         stvx            v24,$x00,$key_          # off-load round[1]
1413         ?vperm          v25,v31,v30,$keyperm
1414         lvx             v31,$x00,$key
1415         stvx            v25,$x10,$key_          # off-load round[2]
1416         addi            $key_,$key_,0x20
1417         bdnz            Load_ctr32_enc_key
1418
1419         lvx             v26,$x10,$key
1420         ?vperm          v24,v30,v31,$keyperm
1421         lvx             v27,$x20,$key
1422         stvx            v24,$x00,$key_          # off-load round[3]
1423         ?vperm          v25,v31,v26,$keyperm
1424         lvx             v28,$x30,$key
1425         stvx            v25,$x10,$key_          # off-load round[4]
1426         addi            $key_,$sp,$FRAME+15     # rewind $key_
1427         ?vperm          v26,v26,v27,$keyperm
1428         lvx             v29,$x40,$key
1429         ?vperm          v27,v27,v28,$keyperm
1430         lvx             v30,$x50,$key
1431         ?vperm          v28,v28,v29,$keyperm
1432         lvx             v31,$x60,$key
1433         ?vperm          v29,v29,v30,$keyperm
1434         lvx             $out0,$x70,$key         # borrow $out0
1435         ?vperm          v30,v30,v31,$keyperm
1436         lvx             v24,$x00,$key_          # pre-load round[1]
1437         ?vperm          v31,v31,$out0,$keyperm
1438         lvx             v25,$x10,$key_          # pre-load round[2]
1439
1440         vadduwm         $two,$one,$one
1441         subi            $inp,$inp,15            # undo "caller"
1442         $SHL            $len,$len,4
1443
1444         vadduwm         $out1,$ivec,$one        # counter values ...
1445         vadduwm         $out2,$ivec,$two
1446         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1447          le?li          $idx,8
1448         vadduwm         $out3,$out1,$two
1449         vxor            $out1,$out1,$rndkey0
1450          le?lvsl        $inpperm,0,$idx
1451         vadduwm         $out4,$out2,$two
1452         vxor            $out2,$out2,$rndkey0
1453          le?vspltisb    $tmp,0x0f
1454         vadduwm         $out5,$out3,$two
1455         vxor            $out3,$out3,$rndkey0
1456          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1457         vadduwm         $out6,$out4,$two
1458         vxor            $out4,$out4,$rndkey0
1459         vadduwm         $out7,$out5,$two
1460         vxor            $out5,$out5,$rndkey0
1461         vadduwm         $ivec,$out6,$two        # next counter value
1462         vxor            $out6,$out6,$rndkey0
1463         vxor            $out7,$out7,$rndkey0
1464
1465         mtctr           $rounds
1466         b               Loop_ctr32_enc8x
1467 .align  5
1468 Loop_ctr32_enc8x:
1469         vcipher         $out0,$out0,v24
1470         vcipher         $out1,$out1,v24
1471         vcipher         $out2,$out2,v24
1472         vcipher         $out3,$out3,v24
1473         vcipher         $out4,$out4,v24
1474         vcipher         $out5,$out5,v24
1475         vcipher         $out6,$out6,v24
1476         vcipher         $out7,$out7,v24
1477 Loop_ctr32_enc8x_middle:
1478         lvx             v24,$x20,$key_          # round[3]
1479         addi            $key_,$key_,0x20
1480
1481         vcipher         $out0,$out0,v25
1482         vcipher         $out1,$out1,v25
1483         vcipher         $out2,$out2,v25
1484         vcipher         $out3,$out3,v25
1485         vcipher         $out4,$out4,v25
1486         vcipher         $out5,$out5,v25
1487         vcipher         $out6,$out6,v25
1488         vcipher         $out7,$out7,v25
1489         lvx             v25,$x10,$key_          # round[4]
1490         bdnz            Loop_ctr32_enc8x
1491
1492         subic           r11,$len,256            # $len-256, borrow $key_
1493         vcipher         $out0,$out0,v24
1494         vcipher         $out1,$out1,v24
1495         vcipher         $out2,$out2,v24
1496         vcipher         $out3,$out3,v24
1497         vcipher         $out4,$out4,v24
1498         vcipher         $out5,$out5,v24
1499         vcipher         $out6,$out6,v24
1500         vcipher         $out7,$out7,v24
1501
1502         subfe           r0,r0,r0                # borrow?-1:0
1503         vcipher         $out0,$out0,v25
1504         vcipher         $out1,$out1,v25
1505         vcipher         $out2,$out2,v25
1506         vcipher         $out3,$out3,v25
1507         vcipher         $out4,$out4,v25
1508         vcipher         $out5,$out5,v25
1509         vcipher         $out6,$out6,v25
1510         vcipher         $out7,$out7,v25
1511
1512         and             r0,r0,r11
1513         addi            $key_,$sp,$FRAME+15     # rewind $key_
1514         vcipher         $out0,$out0,v26
1515         vcipher         $out1,$out1,v26
1516         vcipher         $out2,$out2,v26
1517         vcipher         $out3,$out3,v26
1518         vcipher         $out4,$out4,v26
1519         vcipher         $out5,$out5,v26
1520         vcipher         $out6,$out6,v26
1521         vcipher         $out7,$out7,v26
1522         lvx             v24,$x00,$key_          # re-pre-load round[1]
1523
1524         subic           $len,$len,129           # $len-=129
1525         vcipher         $out0,$out0,v27
1526         addi            $len,$len,1             # $len-=128 really
1527         vcipher         $out1,$out1,v27
1528         vcipher         $out2,$out2,v27
1529         vcipher         $out3,$out3,v27
1530         vcipher         $out4,$out4,v27
1531         vcipher         $out5,$out5,v27
1532         vcipher         $out6,$out6,v27
1533         vcipher         $out7,$out7,v27
1534         lvx             v25,$x10,$key_          # re-pre-load round[2]
1535
1536         vcipher         $out0,$out0,v28
1537          lvx_u          $in0,$x00,$inp          # load input
1538         vcipher         $out1,$out1,v28
1539          lvx_u          $in1,$x10,$inp
1540         vcipher         $out2,$out2,v28
1541          lvx_u          $in2,$x20,$inp
1542         vcipher         $out3,$out3,v28
1543          lvx_u          $in3,$x30,$inp
1544         vcipher         $out4,$out4,v28
1545          lvx_u          $in4,$x40,$inp
1546         vcipher         $out5,$out5,v28
1547          lvx_u          $in5,$x50,$inp
1548         vcipher         $out6,$out6,v28
1549          lvx_u          $in6,$x60,$inp
1550         vcipher         $out7,$out7,v28
1551          lvx_u          $in7,$x70,$inp
1552          addi           $inp,$inp,0x80
1553
1554         vcipher         $out0,$out0,v29
1555          le?vperm       $in0,$in0,$in0,$inpperm
1556         vcipher         $out1,$out1,v29
1557          le?vperm       $in1,$in1,$in1,$inpperm
1558         vcipher         $out2,$out2,v29
1559          le?vperm       $in2,$in2,$in2,$inpperm
1560         vcipher         $out3,$out3,v29
1561          le?vperm       $in3,$in3,$in3,$inpperm
1562         vcipher         $out4,$out4,v29
1563          le?vperm       $in4,$in4,$in4,$inpperm
1564         vcipher         $out5,$out5,v29
1565          le?vperm       $in5,$in5,$in5,$inpperm
1566         vcipher         $out6,$out6,v29
1567          le?vperm       $in6,$in6,$in6,$inpperm
1568         vcipher         $out7,$out7,v29
1569          le?vperm       $in7,$in7,$in7,$inpperm
1570
1571         add             $inp,$inp,r0            # $inp is adjusted in such
1572                                                 # way that at exit from the
1573                                                 # loop inX-in7 are loaded
1574                                                 # with last "words"
1575         subfe.          r0,r0,r0                # borrow?-1:0
1576         vcipher         $out0,$out0,v30
1577          vxor           $in0,$in0,v31           # xor with last round key
1578         vcipher         $out1,$out1,v30
1579          vxor           $in1,$in1,v31
1580         vcipher         $out2,$out2,v30
1581          vxor           $in2,$in2,v31
1582         vcipher         $out3,$out3,v30
1583          vxor           $in3,$in3,v31
1584         vcipher         $out4,$out4,v30
1585          vxor           $in4,$in4,v31
1586         vcipher         $out5,$out5,v30
1587          vxor           $in5,$in5,v31
1588         vcipher         $out6,$out6,v30
1589          vxor           $in6,$in6,v31
1590         vcipher         $out7,$out7,v30
1591          vxor           $in7,$in7,v31
1592
1593         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1594
1595         vcipherlast     $in0,$out0,$in0
1596         vcipherlast     $in1,$out1,$in1
1597          vadduwm        $out1,$ivec,$one        # counter values ...
1598         vcipherlast     $in2,$out2,$in2
1599          vadduwm        $out2,$ivec,$two
1600          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1601         vcipherlast     $in3,$out3,$in3
1602          vadduwm        $out3,$out1,$two
1603          vxor           $out1,$out1,$rndkey0
1604         vcipherlast     $in4,$out4,$in4
1605          vadduwm        $out4,$out2,$two
1606          vxor           $out2,$out2,$rndkey0
1607         vcipherlast     $in5,$out5,$in5
1608          vadduwm        $out5,$out3,$two
1609          vxor           $out3,$out3,$rndkey0
1610         vcipherlast     $in6,$out6,$in6
1611          vadduwm        $out6,$out4,$two
1612          vxor           $out4,$out4,$rndkey0
1613         vcipherlast     $in7,$out7,$in7
1614          vadduwm        $out7,$out5,$two
1615          vxor           $out5,$out5,$rndkey0
1616         le?vperm        $in0,$in0,$in0,$inpperm
1617          vadduwm        $ivec,$out6,$two        # next counter value
1618          vxor           $out6,$out6,$rndkey0
1619         le?vperm        $in1,$in1,$in1,$inpperm
1620          vxor           $out7,$out7,$rndkey0
1621         mtctr           $rounds
1622
1623          vcipher        $out0,$out0,v24
1624         stvx_u          $in0,$x00,$out
1625         le?vperm        $in2,$in2,$in2,$inpperm
1626          vcipher        $out1,$out1,v24
1627         stvx_u          $in1,$x10,$out
1628         le?vperm        $in3,$in3,$in3,$inpperm
1629          vcipher        $out2,$out2,v24
1630         stvx_u          $in2,$x20,$out
1631         le?vperm        $in4,$in4,$in4,$inpperm
1632          vcipher        $out3,$out3,v24
1633         stvx_u          $in3,$x30,$out
1634         le?vperm        $in5,$in5,$in5,$inpperm
1635          vcipher        $out4,$out4,v24
1636         stvx_u          $in4,$x40,$out
1637         le?vperm        $in6,$in6,$in6,$inpperm
1638          vcipher        $out5,$out5,v24
1639         stvx_u          $in5,$x50,$out
1640         le?vperm        $in7,$in7,$in7,$inpperm
1641          vcipher        $out6,$out6,v24
1642         stvx_u          $in6,$x60,$out
1643          vcipher        $out7,$out7,v24
1644         stvx_u          $in7,$x70,$out
1645         addi            $out,$out,0x80
1646
1647         b               Loop_ctr32_enc8x_middle
1648
1649 .align  5
1650 Lctr32_enc8x_break:
1651         cmpwi           $len,-0x60
1652         blt             Lctr32_enc8x_one
1653         nop
1654         beq             Lctr32_enc8x_two
1655         cmpwi           $len,-0x40
1656         blt             Lctr32_enc8x_three
1657         nop
1658         beq             Lctr32_enc8x_four
1659         cmpwi           $len,-0x20
1660         blt             Lctr32_enc8x_five
1661         nop
1662         beq             Lctr32_enc8x_six
1663         cmpwi           $len,0x00
1664         blt             Lctr32_enc8x_seven
1665
1666 Lctr32_enc8x_eight:
1667         vcipherlast     $out0,$out0,$in0
1668         vcipherlast     $out1,$out1,$in1
1669         vcipherlast     $out2,$out2,$in2
1670         vcipherlast     $out3,$out3,$in3
1671         vcipherlast     $out4,$out4,$in4
1672         vcipherlast     $out5,$out5,$in5
1673         vcipherlast     $out6,$out6,$in6
1674         vcipherlast     $out7,$out7,$in7
1675
1676         le?vperm        $out0,$out0,$out0,$inpperm
1677         le?vperm        $out1,$out1,$out1,$inpperm
1678         stvx_u          $out0,$x00,$out
1679         le?vperm        $out2,$out2,$out2,$inpperm
1680         stvx_u          $out1,$x10,$out
1681         le?vperm        $out3,$out3,$out3,$inpperm
1682         stvx_u          $out2,$x20,$out
1683         le?vperm        $out4,$out4,$out4,$inpperm
1684         stvx_u          $out3,$x30,$out
1685         le?vperm        $out5,$out5,$out5,$inpperm
1686         stvx_u          $out4,$x40,$out
1687         le?vperm        $out6,$out6,$out6,$inpperm
1688         stvx_u          $out5,$x50,$out
1689         le?vperm        $out7,$out7,$out7,$inpperm
1690         stvx_u          $out6,$x60,$out
1691         stvx_u          $out7,$x70,$out
1692         addi            $out,$out,0x80
1693         b               Lctr32_enc8x_done
1694
1695 .align  5
1696 Lctr32_enc8x_seven:
1697         vcipherlast     $out0,$out0,$in1
1698         vcipherlast     $out1,$out1,$in2
1699         vcipherlast     $out2,$out2,$in3
1700         vcipherlast     $out3,$out3,$in4
1701         vcipherlast     $out4,$out4,$in5
1702         vcipherlast     $out5,$out5,$in6
1703         vcipherlast     $out6,$out6,$in7
1704
1705         le?vperm        $out0,$out0,$out0,$inpperm
1706         le?vperm        $out1,$out1,$out1,$inpperm
1707         stvx_u          $out0,$x00,$out
1708         le?vperm        $out2,$out2,$out2,$inpperm
1709         stvx_u          $out1,$x10,$out
1710         le?vperm        $out3,$out3,$out3,$inpperm
1711         stvx_u          $out2,$x20,$out
1712         le?vperm        $out4,$out4,$out4,$inpperm
1713         stvx_u          $out3,$x30,$out
1714         le?vperm        $out5,$out5,$out5,$inpperm
1715         stvx_u          $out4,$x40,$out
1716         le?vperm        $out6,$out6,$out6,$inpperm
1717         stvx_u          $out5,$x50,$out
1718         stvx_u          $out6,$x60,$out
1719         addi            $out,$out,0x70
1720         b               Lctr32_enc8x_done
1721
1722 .align  5
1723 Lctr32_enc8x_six:
1724         vcipherlast     $out0,$out0,$in2
1725         vcipherlast     $out1,$out1,$in3
1726         vcipherlast     $out2,$out2,$in4
1727         vcipherlast     $out3,$out3,$in5
1728         vcipherlast     $out4,$out4,$in6
1729         vcipherlast     $out5,$out5,$in7
1730
1731         le?vperm        $out0,$out0,$out0,$inpperm
1732         le?vperm        $out1,$out1,$out1,$inpperm
1733         stvx_u          $out0,$x00,$out
1734         le?vperm        $out2,$out2,$out2,$inpperm
1735         stvx_u          $out1,$x10,$out
1736         le?vperm        $out3,$out3,$out3,$inpperm
1737         stvx_u          $out2,$x20,$out
1738         le?vperm        $out4,$out4,$out4,$inpperm
1739         stvx_u          $out3,$x30,$out
1740         le?vperm        $out5,$out5,$out5,$inpperm
1741         stvx_u          $out4,$x40,$out
1742         stvx_u          $out5,$x50,$out
1743         addi            $out,$out,0x60
1744         b               Lctr32_enc8x_done
1745
1746 .align  5
1747 Lctr32_enc8x_five:
1748         vcipherlast     $out0,$out0,$in3
1749         vcipherlast     $out1,$out1,$in4
1750         vcipherlast     $out2,$out2,$in5
1751         vcipherlast     $out3,$out3,$in6
1752         vcipherlast     $out4,$out4,$in7
1753
1754         le?vperm        $out0,$out0,$out0,$inpperm
1755         le?vperm        $out1,$out1,$out1,$inpperm
1756         stvx_u          $out0,$x00,$out
1757         le?vperm        $out2,$out2,$out2,$inpperm
1758         stvx_u          $out1,$x10,$out
1759         le?vperm        $out3,$out3,$out3,$inpperm
1760         stvx_u          $out2,$x20,$out
1761         le?vperm        $out4,$out4,$out4,$inpperm
1762         stvx_u          $out3,$x30,$out
1763         stvx_u          $out4,$x40,$out
1764         addi            $out,$out,0x50
1765         b               Lctr32_enc8x_done
1766
1767 .align  5
1768 Lctr32_enc8x_four:
1769         vcipherlast     $out0,$out0,$in4
1770         vcipherlast     $out1,$out1,$in5
1771         vcipherlast     $out2,$out2,$in6
1772         vcipherlast     $out3,$out3,$in7
1773
1774         le?vperm        $out0,$out0,$out0,$inpperm
1775         le?vperm        $out1,$out1,$out1,$inpperm
1776         stvx_u          $out0,$x00,$out
1777         le?vperm        $out2,$out2,$out2,$inpperm
1778         stvx_u          $out1,$x10,$out
1779         le?vperm        $out3,$out3,$out3,$inpperm
1780         stvx_u          $out2,$x20,$out
1781         stvx_u          $out3,$x30,$out
1782         addi            $out,$out,0x40
1783         b               Lctr32_enc8x_done
1784
1785 .align  5
1786 Lctr32_enc8x_three:
1787         vcipherlast     $out0,$out0,$in5
1788         vcipherlast     $out1,$out1,$in6
1789         vcipherlast     $out2,$out2,$in7
1790
1791         le?vperm        $out0,$out0,$out0,$inpperm
1792         le?vperm        $out1,$out1,$out1,$inpperm
1793         stvx_u          $out0,$x00,$out
1794         le?vperm        $out2,$out2,$out2,$inpperm
1795         stvx_u          $out1,$x10,$out
1796         stvx_u          $out2,$x20,$out
1797         addi            $out,$out,0x30
1798         b               Lcbc_dec8x_done
1799
1800 .align  5
1801 Lctr32_enc8x_two:
1802         vcipherlast     $out0,$out0,$in6
1803         vcipherlast     $out1,$out1,$in7
1804
1805         le?vperm        $out0,$out0,$out0,$inpperm
1806         le?vperm        $out1,$out1,$out1,$inpperm
1807         stvx_u          $out0,$x00,$out
1808         stvx_u          $out1,$x10,$out
1809         addi            $out,$out,0x20
1810         b               Lcbc_dec8x_done
1811
1812 .align  5
1813 Lctr32_enc8x_one:
1814         vcipherlast     $out0,$out0,$in7
1815
1816         le?vperm        $out0,$out0,$out0,$inpperm
1817         stvx_u          $out0,0,$out
1818         addi            $out,$out,0x10
1819
1820 Lctr32_enc8x_done:
1821         li              r10,`$FRAME+15`
1822         li              r11,`$FRAME+31`
1823         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1824         addi            r10,r10,32
1825         stvx            $inpperm,r11,$sp
1826         addi            r11,r11,32
1827         stvx            $inpperm,r10,$sp
1828         addi            r10,r10,32
1829         stvx            $inpperm,r11,$sp
1830         addi            r11,r11,32
1831         stvx            $inpperm,r10,$sp
1832         addi            r10,r10,32
1833         stvx            $inpperm,r11,$sp
1834         addi            r11,r11,32
1835         stvx            $inpperm,r10,$sp
1836         addi            r10,r10,32
1837         stvx            $inpperm,r11,$sp
1838         addi            r11,r11,32
1839
1840         mtspr           256,$vrsave
1841         lvx             v20,r10,$sp             # ABI says so
1842         addi            r10,r10,32
1843         lvx             v21,r11,$sp
1844         addi            r11,r11,32
1845         lvx             v22,r10,$sp
1846         addi            r10,r10,32
1847         lvx             v23,r11,$sp
1848         addi            r11,r11,32
1849         lvx             v24,r10,$sp
1850         addi            r10,r10,32
1851         lvx             v25,r11,$sp
1852         addi            r11,r11,32
1853         lvx             v26,r10,$sp
1854         addi            r10,r10,32
1855         lvx             v27,r11,$sp
1856         addi            r11,r11,32
1857         lvx             v28,r10,$sp
1858         addi            r10,r10,32
1859         lvx             v29,r11,$sp
1860         addi            r11,r11,32
1861         lvx             v30,r10,$sp
1862         lvx             v31,r11,$sp
1863         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1864         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1865         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1866         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1867         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1868         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1869         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1870         blr
1871         .long           0
1872         .byte           0,12,0x14,0,0x80,6,6,0
1873         .long           0
1874 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1875 ___
1876 }}      }}}
1877
1878 my $consts=1;
1879 foreach(split("\n",$code)) {
1880         s/\`([^\`]*)\`/eval($1)/geo;
1881
1882         # constants table endian-specific conversion
1883         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1884             my $conv=$3;
1885             my @bytes=();
1886
1887             # convert to endian-agnostic format
1888             if ($1 eq "long") {
1889               foreach (split(/,\s*/,$2)) {
1890                 my $l = /^0/?oct:int;
1891                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1892               }
1893             } else {
1894                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1895             }
1896
1897             # little-endian conversion
1898             if ($flavour =~ /le$/o) {
1899                 SWITCH: for($conv)  {
1900                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
1901                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
1902                 }
1903             }
1904
1905             #emit
1906             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1907             next;
1908         }
1909         $consts=0 if (m/Lconsts:/o);    # end of table
1910
1911         # instructions prefixed with '?' are endian-specific and need
1912         # to be adjusted accordingly...
1913         if ($flavour =~ /le$/o) {       # little-endian
1914             s/le\?//o           or
1915             s/be\?/#be#/o       or
1916             s/\?lvsr/lvsl/o     or
1917             s/\?lvsl/lvsr/o     or
1918             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1919             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1920             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1921         } else {                        # big-endian
1922             s/le\?/#le#/o       or
1923             s/be\?//o           or
1924             s/\?([a-z]+)/$1/o;
1925         }
1926
1927         print $_,"\n";
1928 }
1929
1930 close STDOUT;