1 /* memchr optimized with SSE2.
2 Copyright (C) 2017-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <isa-level.h>
22 /* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
23 so we need this to build for ISA V2 builds. */
24 #if ISA_SHOULD_BUILD (2)
27 # define MEMCHR __memchr_sse2
29 # ifdef USE_AS_WMEMCHR
30 # define PCMPEQ pcmpeqd
31 # define CHAR_PER_VEC 4
33 # define PCMPEQ pcmpeqb
34 # define CHAR_PER_VEC 16
37 /* fast SSE2 version with using pmaxub and 64 byte loop */
45 /* Clear the upper 32 bits. */
48 # ifdef USE_AS_WMEMCHR
52 punpcklbw %xmm1, %xmm1
55 punpcklbw %xmm1, %xmm1
59 pshufd $0, %xmm1, %xmm1
70 sub $CHAR_PER_VEC, %rdx
75 # ifdef USE_AS_WMEMCHR
79 sub $(CHAR_PER_VEC * 4), %rdx
90 /* Check if there is a match. */
92 /* Remove the leading bytes. */
95 je L(unaligned_no_match)
96 /* Check which byte is a match. */
98 # ifdef USE_AS_WMEMCHR
111 L(unaligned_no_match):
112 /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
113 "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
114 possible addition overflow. */
117 # ifdef USE_AS_WMEMCHR
123 sub $(CHAR_PER_VEC * 4), %rdx
134 movdqa 16(%rdi), %xmm2
140 movdqa 32(%rdi), %xmm3
146 movdqa 48(%rdi), %xmm4
156 sub $(CHAR_PER_VEC * 4), %rdx
165 movdqa 16(%rdi), %xmm2
171 movdqa 32(%rdi), %xmm3
177 movdqa 48(%rdi), %xmm3
188 # ifdef USE_AS_WMEMCHR
195 sub $(CHAR_PER_VEC * 4), %rdx
198 movdqa 16(%rdi), %xmm2
199 movdqa 32(%rdi), %xmm3
200 movdqa 48(%rdi), %xmm4
227 movdqa 32(%rdi), %xmm3
230 PCMPEQ 48(%rdi), %xmm1
237 lea 48(%rdi, %rax), %rax
242 add $(CHAR_PER_VEC * 2), %edx
251 movdqa 16(%rdi), %xmm2
257 movdqa 32(%rdi), %xmm3
262 sub $CHAR_PER_VEC, %edx
265 PCMPEQ 48(%rdi), %xmm1
274 add $(CHAR_PER_VEC * 2), %edx
280 sub $CHAR_PER_VEC, %edx
283 PCMPEQ 16(%rdi), %xmm1
293 lea -16(%rax, %rdi), %rax
305 lea 16(%rax, %rdi), %rax
311 lea 32(%rax, %rdi), %rax
317 # ifdef USE_AS_WMEMCHR
331 # ifdef USE_AS_WMEMCHR
339 lea 16(%rdi, %rax), %rax
345 # ifdef USE_AS_WMEMCHR
353 lea 32(%rdi, %rax), %rax
359 # ifdef USE_AS_WMEMCHR
367 lea 48(%rdi, %rax), %rax