Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / arch / arm / crypto / ghash-ce-core.S
1 /*
2  * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
3  *
4  * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation.
9  */
10
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
13
14         SHASH           .req    q0
15         SHASH2          .req    q1
16         T1              .req    q2
17         T2              .req    q3
18         MASK            .req    q4
19         XL              .req    q5
20         XM              .req    q6
21         XH              .req    q7
22         IN1             .req    q7
23
24         SHASH_L         .req    d0
25         SHASH_H         .req    d1
26         SHASH2_L        .req    d2
27         T1_L            .req    d4
28         MASK_L          .req    d8
29         XL_L            .req    d10
30         XL_H            .req    d11
31         XM_L            .req    d12
32         XM_H            .req    d13
33         XH_L            .req    d14
34
35         .text
36         .fpu            crypto-neon-fp-armv8
37
38         /*
39          * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
40          *                         struct ghash_key const *k, const char *head)
41          */
42 ENTRY(pmull_ghash_update)
43         vld1.64         {SHASH}, [r3]
44         vld1.64         {XL}, [r1]
45         vmov.i8         MASK, #0xe1
46         vext.8          SHASH2, SHASH, SHASH, #8
47         vshl.u64        MASK, MASK, #57
48         veor            SHASH2, SHASH2, SHASH
49
50         /* do the head block first, if supplied */
51         ldr             ip, [sp]
52         teq             ip, #0
53         beq             0f
54         vld1.64         {T1}, [ip]
55         teq             r0, #0
56         b               1f
57
58 0:      vld1.64         {T1}, [r2]!
59         subs            r0, r0, #1
60
61 1:      /* multiply XL by SHASH in GF(2^128) */
62 #ifndef CONFIG_CPU_BIG_ENDIAN
63         vrev64.8        T1, T1
64 #endif
65         vext.8          T2, XL, XL, #8
66         vext.8          IN1, T1, T1, #8
67         veor            T1, T1, T2
68         veor            XL, XL, IN1
69
70         vmull.p64       XH, SHASH_H, XL_H               @ a1 * b1
71         veor            T1, T1, XL
72         vmull.p64       XL, SHASH_L, XL_L               @ a0 * b0
73         vmull.p64       XM, SHASH2_L, T1_L              @ (a1 + a0)(b1 + b0)
74
75         vext.8          T1, XL, XH, #8
76         veor            T2, XL, XH
77         veor            XM, XM, T1
78         veor            XM, XM, T2
79         vmull.p64       T2, XL_L, MASK_L
80
81         vmov            XH_L, XM_H
82         vmov            XM_H, XL_L
83
84         veor            XL, XM, T2
85         vext.8          T2, XL, XL, #8
86         vmull.p64       XL, XL_L, MASK_L
87         veor            T2, T2, XH
88         veor            XL, XL, T2
89
90         bne             0b
91
92         vst1.64         {XL}, [r1]
93         bx              lr
94 ENDPROC(pmull_ghash_update)