kernel/arch/metag/lib/memcpy.S

   1 !   Copyright (C) 2008-2012 Imagination Technologies Ltd.
   2
   3         .text
   4         .global _memcpy
   5         .type   _memcpy,function
   6 ! D1Ar1 dst
   7 ! D0Ar2 src
   8 ! D1Ar3 cnt
   9 ! D0Re0 dst
  10 _memcpy:
  11         CMP     D1Ar3, #16
  12         MOV     A1.2, D0Ar2             ! source pointer
  13         MOV     A0.2, D1Ar1             ! destination pointer
  14         MOV     A0.3, D1Ar1             ! for return value
  15 ! If there are less than 16 bytes to copy use the byte copy loop
  16         BGE     $Llong_copy
  17
  18 $Lbyte_copy:
  19 ! Simply copy a byte at a time
  20         SUBS    TXRPT, D1Ar3, #1
  21         BLT     $Lend
  22 $Lloop_byte:
  23         GETB    D1Re0, [A1.2++]
  24         SETB    [A0.2++], D1Re0
  25         BR      $Lloop_byte
  26
  27 $Lend:
  28 ! Finally set return value and return
  29         MOV     D0Re0, A0.3
  30         MOV     PC, D1RtP
  31
  32 $Llong_copy:
  33         ANDS    D1Ar5, D1Ar1, #7        ! test destination alignment
  34         BZ      $Laligned_dst
  35
  36 ! The destination address is not 8 byte aligned. We will copy bytes from
  37 ! the source to the destination until the remaining data has an 8 byte
  38 ! destination address alignment (i.e we should never copy more than 7
  39 ! bytes here).
  40 $Lalign_dst:
  41         GETB    D0Re0, [A1.2++]
  42         ADD     D1Ar5, D1Ar5, #1        ! dest is aligned when D1Ar5 reaches #8
  43         SUB     D1Ar3, D1Ar3, #1        ! decrement count of remaining bytes
  44         SETB    [A0.2++], D0Re0
  45         CMP     D1Ar5, #8
  46         BNE     $Lalign_dst
  47
  48 ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
  49 ! blocks, then jump to the unaligned copy loop or fall through to the aligned
  50 ! copy loop as appropriate.
  51 $Laligned_dst:
  52         MOV     D0Ar4, A1.2
  53         LSR     D1Ar5, D1Ar3, #3        ! D1Ar5 = number of 8 byte blocks
  54         ANDS    D0Ar4, D0Ar4, #7        ! test source alignment
  55         BNZ     $Lunaligned_copy        ! if unaligned, use unaligned copy loop
  56
  57 ! Both source and destination are 8 byte aligned - the easy case.
  58 $Laligned_copy:
  59         LSRS    D1Ar5, D1Ar3, #5        ! D1Ar5 = number of 32 byte blocks
  60         BZ      $Lbyte_copy
  61         SUB     TXRPT, D1Ar5, #1
  62
  63 $Laligned_32:
  64         GETL    D0Re0, D1Re0, [A1.2++]
  65         GETL    D0Ar6, D1Ar5, [A1.2++]
  66         SETL    [A0.2++], D0Re0, D1Re0
  67         SETL    [A0.2++], D0Ar6, D1Ar5
  68         GETL    D0Re0, D1Re0, [A1.2++]
  69         GETL    D0Ar6, D1Ar5, [A1.2++]
  70         SETL    [A0.2++], D0Re0, D1Re0
  71         SETL    [A0.2++], D0Ar6, D1Ar5
  72         BR      $Laligned_32
  73
  74 ! If there are any remaining bytes use the byte copy loop, otherwise we are done
  75         ANDS    D1Ar3, D1Ar3, #0x1f
  76         BNZ     $Lbyte_copy
  77         B       $Lend
  78
  79 ! The destination is 8 byte aligned but the source is not, and there are 8
  80 ! or more bytes to be copied.
  81 $Lunaligned_copy:
  82 ! Adjust the source pointer (A1.2) to the 8 byte boundary before its
  83 ! current value
  84         MOV     D0Ar4, A1.2
  85         MOV     D0Ar6, A1.2
  86         ANDMB   D0Ar4, D0Ar4, #0xfff8
  87         MOV     A1.2, D0Ar4
  88 ! Save the number of bytes of mis-alignment in D0Ar4 for use later
  89         SUBS    D0Ar6, D0Ar6, D0Ar4
  90         MOV     D0Ar4, D0Ar6
  91 ! if there is no mis-alignment after all, use the aligned copy loop
  92         BZ      $Laligned_copy
  93
  94 ! prefetch 8 bytes
  95         GETL    D0Re0, D1Re0, [A1.2]
  96
  97         SUB     TXRPT, D1Ar5, #1
  98
  99 ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
 100 ! 4 bytes, and more than 4 bytes.
 101         CMP     D0Ar6, #4
 102         BLT     $Lunaligned_1_2_3       ! use 1-3 byte mis-alignment loop
 103         BZ      $Lunaligned_4           ! use 4 byte mis-alignment loop
 104
 105 ! The mis-alignment is more than 4 bytes
 106 $Lunaligned_5_6_7:
 107         SUB     D0Ar6, D0Ar6, #4
 108 ! Calculate the bit offsets required for the shift operations necesssary
 109 ! to align the data.
 110 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
 111         MULW    D0Ar6, D0Ar6, #8
 112         MOV     D1Ar5, #32
 113         SUB     D1Ar5, D1Ar5, D0Ar6
 114 ! Move data 4 bytes before we enter the main loop
 115         MOV     D0Re0, D1Re0
 116
 117 $Lloop_5_6_7:
 118         GETL    D0Ar2, D1Ar1, [++A1.2]
 119 ! form 64-bit data in D0Re0, D1Re0
 120         LSR     D0Re0, D0Re0, D0Ar6
 121         MOV     D1Re0, D0Ar2
 122         LSL     D1Re0, D1Re0, D1Ar5
 123         ADD     D0Re0, D0Re0, D1Re0
 124
 125         LSR     D0Ar2, D0Ar2, D0Ar6
 126         LSL     D1Re0, D1Ar1, D1Ar5
 127         ADD     D1Re0, D1Re0, D0Ar2
 128
 129         SETL    [A0.2++], D0Re0, D1Re0
 130         MOV     D0Re0, D1Ar1
 131         BR      $Lloop_5_6_7
 132
 133         B       $Lunaligned_end
 134
 135 $Lunaligned_1_2_3:
 136 ! Calculate the bit offsets required for the shift operations necesssary
 137 ! to align the data.
 138 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
 139         MULW    D0Ar6, D0Ar6, #8
 140         MOV     D1Ar5, #32
 141         SUB     D1Ar5, D1Ar5, D0Ar6
 142
 143 $Lloop_1_2_3:
 144 ! form 64-bit data in D0Re0,D1Re0
 145         LSR     D0Re0, D0Re0, D0Ar6
 146         LSL     D1Ar1, D1Re0, D1Ar5
 147         ADD     D0Re0, D0Re0, D1Ar1
 148         MOV     D0Ar2, D1Re0
 149         LSR     D0FrT, D0Ar2, D0Ar6
 150         GETL    D0Ar2, D1Ar1, [++A1.2]
 151
 152         MOV     D1Re0, D0Ar2
 153         LSL     D1Re0, D1Re0, D1Ar5
 154         ADD     D1Re0, D1Re0, D0FrT
 155
 156         SETL    [A0.2++], D0Re0, D1Re0
 157         MOV     D0Re0, D0Ar2
 158         MOV     D1Re0, D1Ar1
 159         BR      $Lloop_1_2_3
 160
 161         B       $Lunaligned_end
 162
 163 ! The 4 byte mis-alignment case - this does not require any shifting, just a
 164 ! shuffling of registers.
 165 $Lunaligned_4:
 166         MOV     D0Re0, D1Re0
 167 $Lloop_4:
 168         GETL    D0Ar2, D1Ar1, [++A1.2]
 169         MOV     D1Re0, D0Ar2
 170         SETL    [A0.2++], D0Re0, D1Re0
 171         MOV     D0Re0, D1Ar1
 172         BR      $Lloop_4
 173
 174 $Lunaligned_end:
 175 ! If there are no remaining bytes to copy, we are done.
 176         ANDS    D1Ar3, D1Ar3, #7
 177         BZ      $Lend
 178 ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
 179 ! address of the remaining bytes, and fall through to the byte copy loop.
 180         MOV     D0Ar6, A1.2
 181         ADD     D1Ar5, D0Ar4, D0Ar6
 182         MOV     A1.2, D1Ar5
 183         B       $Lbyte_copy
 184
 185         .size _memcpy,.-_memcpy