kernel/arch/x86/lib/memset_64.S

   1 /* Copyright 2002 Andi Kleen, SuSE Labs */
   2
   3 #include <linux/linkage.h>
   4 #include <asm/dwarf2.h>
   5 #include <asm/cpufeature.h>
   6 #include <asm/alternative-asm.h>
   7
   8 .weak memset
   9
  10 /*
  11  * ISO C memset - set a memory block to a byte value. This function uses fast
  12  * string to get better performance than the original function. The code is
  13  * simpler and shorter than the orignal function as well.
  14  *
  15  * rdi   destination
  16  * rsi   value (char)
  17  * rdx   count (bytes)
  18  *
  19  * rax   original destination
  20  */
  21 ENTRY(memset)
  22 ENTRY(__memset)
  23         /*
  24          * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
  25          * to use it when possible. If not available, use fast string instructions.
  26          *
  27          * Otherwise, use original memset function.
  28          */
  29         ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
  30                       "jmp memset_erms", X86_FEATURE_ERMS
  31
  32         movq %rdi,%r9
  33         movq %rdx,%rcx
  34         andl $7,%edx
  35         shrq $3,%rcx
  36         /* expand byte value  */
  37         movzbl %sil,%esi
  38         movabs $0x0101010101010101,%rax
  39         imulq %rsi,%rax
  40         rep stosq
  41         movl %edx,%ecx
  42         rep stosb
  43         movq %r9,%rax
  44         ret
  45 ENDPROC(memset)
  46 ENDPROC(__memset)
  47
  48 /*
  49  * ISO C memset - set a memory block to a byte value. This function uses
  50  * enhanced rep stosb to override the fast string function.
  51  * The code is simpler and shorter than the fast string function as well.
  52  *
  53  * rdi   destination
  54  * rsi   value (char)
  55  * rdx   count (bytes)
  56  *
  57  * rax   original destination
  58  */
  59 ENTRY(memset_erms)
  60         movq %rdi,%r9
  61         movb %sil,%al
  62         movq %rdx,%rcx
  63         rep stosb
  64         movq %r9,%rax
  65         ret
  66 ENDPROC(memset_erms)
  67
  68 ENTRY(memset_orig)
  69         CFI_STARTPROC
  70         movq %rdi,%r10
  71
  72         /* expand byte value  */
  73         movzbl %sil,%ecx
  74         movabs $0x0101010101010101,%rax
  75         imulq  %rcx,%rax
  76
  77         /* align dst */
  78         movl  %edi,%r9d
  79         andl  $7,%r9d
  80         jnz  .Lbad_alignment
  81         CFI_REMEMBER_STATE
  82 .Lafter_bad_alignment:
  83
  84         movq  %rdx,%rcx
  85         shrq  $6,%rcx
  86         jz       .Lhandle_tail
  87
  88         .p2align 4
  89 .Lloop_64:
  90         decq  %rcx
  91         movq  %rax,(%rdi)
  92         movq  %rax,8(%rdi)
  93         movq  %rax,16(%rdi)
  94         movq  %rax,24(%rdi)
  95         movq  %rax,32(%rdi)
  96         movq  %rax,40(%rdi)
  97         movq  %rax,48(%rdi)
  98         movq  %rax,56(%rdi)
  99         leaq  64(%rdi),%rdi
 100         jnz    .Lloop_64
 101
 102         /* Handle tail in loops. The loops should be faster than hard
 103            to predict jump tables. */
 104         .p2align 4
 105 .Lhandle_tail:
 106         movl    %edx,%ecx
 107         andl    $63&(~7),%ecx
 108         jz              .Lhandle_7
 109         shrl    $3,%ecx
 110         .p2align 4
 111 .Lloop_8:
 112         decl   %ecx
 113         movq  %rax,(%rdi)
 114         leaq  8(%rdi),%rdi
 115         jnz    .Lloop_8
 116
 117 .Lhandle_7:
 118         andl    $7,%edx
 119         jz      .Lende
 120         .p2align 4
 121 .Lloop_1:
 122         decl    %edx
 123         movb    %al,(%rdi)
 124         leaq    1(%rdi),%rdi
 125         jnz     .Lloop_1
 126
 127 .Lende:
 128         movq    %r10,%rax
 129         ret
 130
 131         CFI_RESTORE_STATE
 132 .Lbad_alignment:
 133         cmpq $7,%rdx
 134         jbe     .Lhandle_7
 135         movq %rax,(%rdi)        /* unaligned store */
 136         movq $8,%r8
 137         subq %r9,%r8
 138         addq %r8,%rdi
 139         subq %r8,%rdx
 140         jmp .Lafter_bad_alignment
 141 .Lfinal:
 142         CFI_ENDPROC
 143 ENDPROC(memset_orig)