1 #ifndef X86_BITS_STRING_H
2 #define X86_BITS_STRING_H
5 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 FILE_LICENCE ( GPL2_OR_LATER );
27 * Optimised string operations
31 #define __HAVE_ARCH_MEMCPY
33 extern void * __memcpy ( void *dest, const void *src, size_t len );
34 extern void * __memcpy_reverse ( void *dest, const void *src, size_t len );
37 * Copy memory area (where length is a compile-time constant)
39 * @v dest Destination address
40 * @v src Source address
42 * @ret dest Destination address
44 static inline __attribute__ (( always_inline )) void *
45 __constant_memcpy ( void *dest, const void *src, size_t len ) {
50 } __attribute__ (( __may_alias__ )) *dest_u = dest;
55 } __attribute__ (( __may_alias__ )) *src_u = src;
60 case 0 : /* 0 bytes */
63 * Single-register moves; these are always better than a
64 * string operation. We can clobber an arbitrary two
65 * registers (data, source, dest can re-use source register)
66 * instead of being restricted to esi and edi. There's also a
67 * much greater potential for optimising with nearby code.
70 case 1 : /* 4 bytes */
71 dest_u->u8[0] = src_u->u8[0];
73 case 2 : /* 6 bytes */
74 dest_u->u16[0] = src_u->u16[0];
76 case 4 : /* 4 bytes */
77 dest_u->u32[0] = src_u->u32[0];
80 * Double-register moves; these are probably still a win.
83 case 3 : /* 12 bytes */
84 dest_u->u16[0] = src_u->u16[0];
85 dest_u->u8[2] = src_u->u8[2];
87 case 5 : /* 10 bytes */
88 dest_u->u32[0] = src_u->u32[0];
89 dest_u->u8[4] = src_u->u8[4];
91 case 6 : /* 12 bytes */
92 dest_u->u32[0] = src_u->u32[0];
93 dest_u->u16[2] = src_u->u16[2];
95 case 8 : /* 10 bytes */
96 dest_u->u32[0] = src_u->u32[0];
97 dest_u->u32[1] = src_u->u32[1];
101 /* Even if we have to load up esi and edi ready for a string
102 * operation, we can sometimes save space by using multiple
103 * single-byte "movs" operations instead of loading up ecx and
106 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
107 * to allow for saving/restoring ecx 50% of the time.
109 * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
110 * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
111 * but "movsl" moves twice as much data, so it balances out).
113 * The cutoff point therefore occurs around 26 bytes; the byte
114 * requirements for each method are:
116 * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
117 * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
118 * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10
125 return __memcpy ( dest, src, len );
128 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
129 : "0" ( edi ), "1" ( esi ) : "memory" );
131 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
132 : "0" ( edi ), "1" ( esi ) : "memory" );
134 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
135 : "0" ( edi ), "1" ( esi ) : "memory" );
137 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
138 : "0" ( edi ), "1" ( esi ) : "memory" );
140 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
141 : "0" ( edi ), "1" ( esi ) : "memory" );
143 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
144 : "0" ( edi ), "1" ( esi ) : "memory" );
145 if ( ( len % 4 ) >= 2 )
146 __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
147 : "0" ( edi ), "1" ( esi ) : "memory" );
148 if ( ( len % 2 ) >= 1 )
149 __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
150 : "0" ( edi ), "1" ( esi ) : "memory" );
158 * @v dest Destination address
159 * @v src Source address
161 * @ret dest Destination address
163 static inline __attribute__ (( always_inline )) void *
164 memcpy ( void *dest, const void *src, size_t len ) {
165 if ( __builtin_constant_p ( len ) ) {
166 return __constant_memcpy ( dest, src, len );
168 return __memcpy ( dest, src, len );
172 #define __HAVE_ARCH_MEMMOVE
174 extern void * __memmove ( void *dest, const void *src, size_t len );
177 * Copy (possibly overlapping) memory area
179 * @v dest Destination address
180 * @v src Source address
182 * @ret dest Destination address
184 static inline __attribute__ (( always_inline )) void *
185 memmove ( void *dest, const void *src, size_t len ) {
186 ssize_t offset = ( dest - src );
188 if ( __builtin_constant_p ( offset ) ) {
190 return memcpy ( dest, src, len );
192 return __memcpy_reverse ( dest, src, len );
195 return __memmove ( dest, src, len );
199 #define __HAVE_ARCH_MEMSET
204 * @v dest Destination address
205 * @v fill Fill pattern
207 * @ret dest Destination address
209 static inline void * memset ( void *dest, int fill, size_t len ) {
213 __asm__ __volatile__ ( "rep stosb"
214 : "=&D" ( discard_D ), "=&c" ( discard_c )
215 : "0" ( dest ), "1" ( len ), "a" ( fill )
220 #define __HAVE_ARCH_MEMSWAP
222 extern void * memswap ( void *dest, void *src, size_t len );
224 #define __HAVE_ARCH_STRNCMP
226 extern int strncmp ( const char *str1, const char *str2, size_t len );
228 #define __HAVE_ARCH_STRLEN
230 extern size_t strlen ( const char *string );
232 #endif /* X86_BITS_STRING_H */