Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / arch / c6x / lib / csum_64plus.S
diff --git a/kernel/arch/c6x/lib/csum_64plus.S b/kernel/arch/c6x/lib/csum_64plus.S
new file mode 100644 (file)
index 0000000..6d25896
--- /dev/null
@@ -0,0 +1,419 @@
+;
+;  linux/arch/c6x/lib/csum_64plus.s
+;
+;  Port on Texas Instruments TMS320C6x architecture
+;
+;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+;  This program is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License version 2 as
+;  published by the Free Software Foundation.
+;
+#include <linux/linkage.h>
+
+;
+;unsigned int csum_partial_copy(const char *src, char * dst,
+;                              int len, int sum)
+;
+; A4:  src
+; B4:  dst
+; A6:  len
+; B6:  sum
+; return csum in A4
+;
+
+       .text
+ENTRY(csum_partial_copy)
+       MVC     .S2     ILC,B30
+
+       MV      .D1X    B6,A31          ; given csum
+       ZERO    .D1     A9              ; csum (a side)
+||     ZERO    .D2     B9              ; csum (b side)
+||     SHRU    .S2X    A6,2,B5         ; len / 4
+
+       ;; Check alignment and size
+       AND     .S1     3,A4,A1
+||     AND     .S2     3,B4,B0
+       OR      .L2X    B0,A1,B0        ; non aligned condition
+||     MVC     .S2     B5,ILC
+||     MVK     .D2     1,B2
+||     MV      .D1X    B5,A1           ; words condition
+  [!A1]        B       .S1     L8
+   [B0] BNOP   .S1     L6,5
+
+       SPLOOP          1
+
+       ;; Main loop for aligned words
+       LDW     .D1T1   *A4++,A7
+       NOP     4
+       MV      .S2X    A7,B7
+||     EXTU    .S1     A7,0,16,A16
+       STW     .D2T2   B7,*B4++
+||     MPYU    .M2     B7,B2,B8
+||     ADD     .L1     A16,A9,A9
+       NOP
+       SPKERNEL        8,0
+||     ADD     .L2     B8,B9,B9
+
+       ZERO    .D1     A1
+||     ADD     .L1X    A9,B9,A9        ;  add csum from a and b sides
+
+L6:
+  [!A1]        BNOP    .S1     L8,5
+
+       ;; Main loop for non-aligned words
+       SPLOOP          2
+ ||    MVK     .L1     1,A2
+
+       LDNW    .D1T1   *A4++,A7
+       NOP             3
+
+       NOP
+       MV      .S2X    A7,B7
+ ||    EXTU    .S1     A7,0,16,A16
+ ||    MPYU    .M1     A7,A2,A8
+
+       ADD     .L1     A16,A9,A9
+       SPKERNEL        6,0
+ ||    STNW    .D2T2   B7,*B4++
+ ||    ADD     .L1     A8,A9,A9
+
+L8:    AND     .S2X    2,A6,B5
+       CMPGT   .L2     B5,0,B0
+  [!B0]        BNOP    .S1     L82,4
+
+       ;; Manage half-word
+       ZERO    .L1     A7
+||     ZERO    .D1     A8
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+
+       LDBU    .D1T1   *A4++,A7
+       LDBU    .D1T1   *A4++,A8
+       NOP             3
+       SHL     .S1     A7,8,A0
+       ADD     .S1     A8,A9,A9
+       STB     .D2T1   A7,*B4++
+||     ADD     .S1     A0,A9,A9
+       STB     .D2T1   A8,*B4++
+
+#else
+
+       LDBU    .D1T1   *A4++,A7
+       LDBU    .D1T1   *A4++,A8
+       NOP             3
+       ADD     .S1     A7,A9,A9
+       SHL     .S1     A8,8,A0
+
+       STB     .D2T1   A7,*B4++
+||     ADD     .S1     A0,A9,A9
+       STB     .D2T1   A8,*B4++
+
+#endif
+
+       ;; Manage eventually the last byte
+L82:   AND     .S2X    1,A6,B0
+  [!B0]        BNOP    .S1     L9,5
+
+||     ZERO    .L1     A7
+
+L83:   LDBU    .D1T1   *A4++,A7
+       NOP             4
+
+       MV      .L2X    A7,B7
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+
+       STB     .D2T2   B7,*B4++
+||     SHL     .S1     A7,8,A7
+       ADD     .S1     A7,A9,A9
+
+#else
+
+       STB     .D2T2   B7,*B4++
+||     ADD     .S1     A7,A9,A9
+
+#endif
+
+       ;; Fold the csum
+L9:    SHRU    .S2X    A9,16,B0
+  [!B0]        BNOP    .S1     L10,5
+
+L91:   SHRU    .S2X    A9,16,B4
+||     EXTU    .S1     A9,16,16,A3
+       ADD     .D1X    A3,B4,A9
+
+       SHRU    .S1     A9,16,A0
+   [A0]        BNOP    .S1     L91,5
+
+L10:   ADD     .D1     A31,A9,A9
+       MV      .D1     A9,A4
+
+       BNOP    .S2     B3,4
+       MVC     .S2     B30,ILC
+ENDPROC(csum_partial_copy)
+
+;
+;unsigned short
+;ip_fast_csum(unsigned char *iph, unsigned int ihl)
+;{
+;      unsigned int checksum = 0;
+;      unsigned short *tosum = (unsigned short *) iph;
+;      int len;
+;
+;      len = ihl*4;
+;
+;      if (len <= 0)
+;              return 0;
+;
+;      while(len) {
+;              len -= 2;
+;              checksum += *tosum++;
+;      }
+;      if (len & 1)
+;              checksum += *(unsigned char*) tosum;
+;
+;      while(checksum >> 16)
+;              checksum = (checksum & 0xffff) + (checksum >> 16);
+;
+;      return ~checksum;
+;}
+;
+; A4:  iph
+; B4:  ihl
+; return checksum in A4
+;
+       .text
+
+ENTRY(ip_fast_csum)
+       ZERO    .D1     A5
+ ||    MVC     .S2     ILC,B30
+       SHL     .S2     B4,2,B0
+       CMPGT   .L2     B0,0,B1
+  [!B1] BNOP   .S1     L15,4
+  [!B1]        ZERO    .D1     A3
+
+  [!B0]        B       .S1     L12
+       SHRU    .S2     B0,1,B0
+       MVC     .S2     B0,ILC
+       NOP     3
+
+       SPLOOP  1
+       LDHU    .D1T1   *A4++,A3
+       NOP     3
+       NOP
+       SPKERNEL        5,0
+ ||    ADD     .L1     A3,A5,A5
+
+L12:   SHRU    .S1     A5,16,A0
+  [!A0]        BNOP    .S1     L14,5
+
+L13:   SHRU    .S2X    A5,16,B4
+       EXTU    .S1     A5,16,16,A3
+       ADD     .D1X    A3,B4,A5
+       SHRU    .S1     A5,16,A0
+  [A0] BNOP    .S1     L13,5
+
+L14:   NOT     .D1     A5,A3
+       EXTU    .S1     A3,16,16,A3
+
+L15:   BNOP    .S2     B3,3
+       MVC     .S2     B30,ILC
+       MV      .D1     A3,A4
+ENDPROC(ip_fast_csum)
+
+;
+;unsigned short
+;do_csum(unsigned char *buff, unsigned int len)
+;{
+;      int odd, count;
+;      unsigned int result = 0;
+;
+;      if (len <= 0)
+;              goto out;
+;      odd = 1 & (unsigned long) buff;
+;      if (odd) {
+;#ifdef __LITTLE_ENDIAN
+;              result += (*buff << 8);
+;#else
+;              result = *buff;
+;#endif
+;              len--;
+;              buff++;
+;      }
+;      count = len >> 1;               /* nr of 16-bit words.. */
+;      if (count) {
+;              if (2 & (unsigned long) buff) {
+;                      result += *(unsigned short *) buff;
+;                      count--;
+;                      len -= 2;
+;                      buff += 2;
+;              }
+;              count >>= 1;            /* nr of 32-bit words.. */
+;              if (count) {
+;                      unsigned int carry = 0;
+;                      do {
+;                              unsigned int w = *(unsigned int *) buff;
+;                              count--;
+;                              buff += 4;
+;                              result += carry;
+;                              result += w;
+;                              carry = (w > result);
+;                      } while (count);
+;                      result += carry;
+;                      result = (result & 0xffff) + (result >> 16);
+;              }
+;              if (len & 2) {
+;                      result += *(unsigned short *) buff;
+;                      buff += 2;
+;              }
+;      }
+;      if (len & 1)
+;#ifdef __LITTLE_ENDIAN
+;              result += *buff;
+;#else
+;              result += (*buff << 8);
+;#endif
+;      result = (result & 0xffff) + (result >> 16);
+;      /* add up carry.. */
+;      result = (result & 0xffff) + (result >> 16);
+;      if (odd)
+;              result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+;out:
+;      return result;
+;}
+;
+; A4:  buff
+; B4:  len
+; return checksum in A4
+;
+
+ENTRY(do_csum)
+          CMPGT   .L2     B4,0,B0
+   [!B0]   BNOP    .S1    L26,3
+          EXTU    .S1     A4,31,31,A0
+
+          MV      .L1     A0,A3
+||        MV      .S1X    B3,A5
+||        MV      .L2     B4,B3
+||        ZERO    .D1     A1
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   [A0]    SUB    .L2     B3,1,B3
+|| [A0]    LDBU    .D1T1   *A4++,A1
+#else
+   [!A0]   BNOP    .S1    L21,5
+|| [A0]    LDBU    .D1T1   *A4++,A0
+          SUB     .L2     B3,1,B3
+||        SHL     .S1     A0,8,A1
+L21:
+#endif
+          SHR     .S2     B3,1,B0
+   [!B0]   BNOP    .S1    L24,3
+          MVK     .L1     2,A0
+          AND     .L1     A4,A0,A0
+
+   [!A0]   BNOP    .S1    L22,5
+|| [A0]    LDHU    .D1T1   *A4++,A0
+          SUB     .L2     B0,1,B0
+||        SUB     .S2     B3,2,B3
+||        ADD     .L1     A0,A1,A1
+L22:
+          SHR     .S2     B0,1,B0
+||        ZERO    .L1     A0
+
+   [!B0]   BNOP    .S1    L23,5
+|| [B0]    MVC    .S2     B0,ILC
+
+          SPLOOP  3
+          SPMASK  L1
+||        MV      .L1     A1,A2
+||        LDW     .D1T1   *A4++,A1
+
+          NOP     4
+          ADD     .L1     A0,A1,A0
+          ADD     .L1     A2,A0,A2
+
+          SPKERNEL 1,2
+||        CMPGTU  .L1     A1,A2,A0
+
+          ADD     .L1     A0,A2,A6
+          EXTU    .S1     A6,16,16,A7
+          SHRU    .S2X    A6,16,B0
+          NOP             1
+          ADD     .L1X    A7,B0,A1
+L23:
+          MVK     .L2     2,B0
+          AND     .L2     B3,B0,B0
+   [B0]    LDHU    .D1T1   *A4++,A0
+          NOP     4
+   [B0]    ADD    .L1     A0,A1,A1
+L24:
+          EXTU    .S2     B3,31,31,B0
+#ifdef CONFIG_CPU_BIG_ENDIAN
+   [!B0]   BNOP    .S1    L25,4
+|| [B0]    LDBU    .D1T1   *A4,A0
+          SHL     .S1     A0,8,A0
+          ADD     .L1     A0,A1,A1
+L25:
+#else
+   [B0]    LDBU    .D1T1   *A4,A0
+          NOP     4
+   [B0]    ADD    .L1     A0,A1,A1
+#endif
+          EXTU    .S1     A1,16,16,A0
+          SHRU    .S2X    A1,16,B0
+          NOP     1
+          ADD     .L1X    A0,B0,A0
+          SHRU    .S1     A0,16,A1
+          ADD     .L1     A0,A1,A0
+          EXTU    .S1     A0,16,16,A1
+          EXTU    .S1     A1,16,24,A2
+
+          EXTU    .S1     A1,24,16,A0
+||        MV      .L2X    A3,B0
+
+   [B0]    OR     .L1     A0,A2,A1
+L26:
+          NOP     1
+          BNOP    .S2X    A5,4
+          MV      .L1     A1,A4
+ENDPROC(do_csum)
+
+;__wsum csum_partial(const void *buff, int len, __wsum wsum)
+;{
+;      unsigned int sum = (__force unsigned int)wsum;
+;      unsigned int result = do_csum(buff, len);
+;
+;      /* add in old sum, and carry.. */
+;      result += sum;
+;      if (sum > result)
+;              result += 1;
+;      return (__force __wsum)result;
+;}
+;
+ENTRY(csum_partial)
+          MV      .L1X    B3,A9
+||        CALLP   .S2     do_csum,B3
+||        MV      .S1     A6,A8
+          BNOP    .S2X    A9,2
+          ADD     .L1     A8,A4,A1
+          CMPGTU  .L1     A8,A1,A0
+          ADD     .L1     A1,A0,A4
+ENDPROC(csum_partial)
+
+;unsigned short
+;ip_compute_csum(unsigned char *buff, unsigned int len)
+;
+; A4:  buff
+; B4:  len
+; return checksum in A4
+
+ENTRY(ip_compute_csum)
+          MV      .L1X    B3,A9
+||        CALLP   .S2     do_csum,B3
+          BNOP    .S2X    A9,3
+          NOT     .S1     A4,A4
+          CLR     .S1     A4,16,31,A4
+ENDPROC(ip_compute_csum)