initial commit
[glibc.git] / sysdeps / powerpc / powerpc64 / power4 / strncmp.S
1 /* Optimized strcmp implementation for PowerPC64.
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <sysdep.h>
20
21 #ifndef STRNCMP
22 # define STRNCMP strncmp
23 #endif
24
25 /* See strlen.s for comments on how the end-of-string testing works. */
26
27 /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
28
29 ENTRY_TOCLESS (STRNCMP, 4)
30 CALL_MCOUNT 3
31
32 #define rTMP2 r0
33 #define rRTN r3
34 #define rSTR1 r3 /* first string arg */
35 #define rSTR2 r4 /* second string arg */
36 #define rN r5 /* max string length */
37 #define rWORD1 r6 /* current word in s1 */
38 #define rWORD2 r7 /* current word in s2 */
39 #define rWORD3 r10
40 #define rWORD4 r11
41 #define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */
42 #define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
43 #define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
44 #define rBITDIF r11 /* bits that differ in s1 & s2 words */
45 #define rTMP r12
46
47 dcbt 0,rSTR1
48 or rTMP, rSTR2, rSTR1
49 lis r7F7F, 0x7f7f
50 dcbt 0,rSTR2
51 clrldi. rTMP, rTMP, 61
52 cmpldi cr1, rN, 0
53 lis rFEFE, -0x101
54 bne L(unaligned)
55 /* We are doubleword aligned so set up for two loops. first a double word
56 loop, then fall into the byte loop if any residual. */
57 srdi. rTMP, rN, 3
58 clrldi rN, rN, 61
59 addi rFEFE, rFEFE, -0x101
60 addi r7F7F, r7F7F, 0x7f7f
61 cmpldi cr1, rN, 0
62 beq L(unaligned)
63
64 mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */
65 ld rWORD1, 0(rSTR1)
66 ld rWORD2, 0(rSTR2)
67 sldi rTMP, rFEFE, 32
68 insrdi r7F7F, r7F7F, 32, 0
69 add rFEFE, rFEFE, rTMP
70 b L(g1)
71
72 L(g0):
73 ldu rWORD1, 8(rSTR1)
74 bne- cr1, L(different)
75 ldu rWORD2, 8(rSTR2)
76 L(g1): add rTMP, rFEFE, rWORD1
77 nor rNEG, r7F7F, rWORD1
78 bdz L(tail)
79 and. rTMP, rTMP, rNEG
80 cmpd cr1, rWORD1, rWORD2
81 beq+ L(g0)
82
83 /* OK. We've hit the end of the string. We need to be careful that
84 we don't compare two strings as different because of gunk beyond
85 the end of the strings... */
86
87 #ifdef __LITTLE_ENDIAN__
88 L(endstring):
89 addi rTMP2, rTMP, -1
90 beq cr1, L(equal)
91 andc rTMP2, rTMP2, rTMP
92 rldimi rTMP2, rTMP2, 1, 0
93 and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
94 and rWORD1, rWORD1, rTMP2
95 cmpd cr1, rWORD1, rWORD2
96 beq cr1, L(equal)
97 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
98 neg rNEG, rBITDIF
99 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
100 cntlzd rNEG, rNEG /* bitcount of the bit. */
101 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
102 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
103 sld rWORD2, rWORD2, rNEG
104 xor. rBITDIF, rWORD1, rWORD2
105 sub rRTN, rWORD1, rWORD2
106 blt- L(highbit)
107 sradi rRTN, rRTN, 63 /* must return an int. */
108 ori rRTN, rRTN, 1
109 blr
110 L(equal):
111 li rRTN, 0
112 blr
113
114 L(different):
115 ld rWORD1, -8(rSTR1)
116 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
117 neg rNEG, rBITDIF
118 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
119 cntlzd rNEG, rNEG /* bitcount of the bit. */
120 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
121 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
122 sld rWORD2, rWORD2, rNEG
123 xor. rBITDIF, rWORD1, rWORD2
124 sub rRTN, rWORD1, rWORD2
125 blt- L(highbit)
126 sradi rRTN, rRTN, 63
127 ori rRTN, rRTN, 1
128 blr
129 L(highbit):
130 sradi rRTN, rWORD2, 63
131 ori rRTN, rRTN, 1
132 blr
133
134 #else
135 L(endstring):
136 and rTMP, r7F7F, rWORD1
137 beq cr1, L(equal)
138 add rTMP, rTMP, r7F7F
139 xor. rBITDIF, rWORD1, rWORD2
140 andc rNEG, rNEG, rTMP
141 blt- L(highbit)
142 cntlzd rBITDIF, rBITDIF
143 cntlzd rNEG, rNEG
144 addi rNEG, rNEG, 7
145 cmpd cr1, rNEG, rBITDIF
146 sub rRTN, rWORD1, rWORD2
147 blt- cr1, L(equal)
148 sradi rRTN, rRTN, 63 /* must return an int. */
149 ori rRTN, rRTN, 1
150 blr
151 L(equal):
152 li rRTN, 0
153 blr
154
155 L(different):
156 ld rWORD1, -8(rSTR1)
157 xor. rBITDIF, rWORD1, rWORD2
158 sub rRTN, rWORD1, rWORD2
159 blt- L(highbit)
160 sradi rRTN, rRTN, 63
161 ori rRTN, rRTN, 1
162 blr
163 L(highbit):
164 sradi rRTN, rWORD2, 63
165 ori rRTN, rRTN, 1
166 blr
167 #endif
168
169 /* Oh well. In this case, we just do a byte-by-byte comparison. */
170 .align 4
171 L(tail):
172 and. rTMP, rTMP, rNEG
173 cmpd cr1, rWORD1, rWORD2
174 bne- L(endstring)
175 addi rSTR1, rSTR1, 8
176 bne- cr1, L(different)
177 addi rSTR2, rSTR2, 8
178 cmpldi cr1, rN, 0
179 L(unaligned):
180 mtctr rN /* Power4 wants mtctr 1st in dispatch group */
181 ble cr1, L(ux)
182 L(uz):
183 lbz rWORD1, 0(rSTR1)
184 lbz rWORD2, 0(rSTR2)
185 .align 4
186 L(u1):
187 cmpdi cr1, rWORD1, 0
188 bdz L(u4)
189 cmpd rWORD1, rWORD2
190 beq- cr1, L(u4)
191 bne- L(u4)
192 lbzu rWORD3, 1(rSTR1)
193 lbzu rWORD4, 1(rSTR2)
194 cmpdi cr1, rWORD3, 0
195 bdz L(u3)
196 cmpd rWORD3, rWORD4
197 beq- cr1, L(u3)
198 bne- L(u3)
199 lbzu rWORD1, 1(rSTR1)
200 lbzu rWORD2, 1(rSTR2)
201 cmpdi cr1, rWORD1, 0
202 bdz L(u4)
203 cmpd rWORD1, rWORD2
204 beq- cr1, L(u4)
205 bne- L(u4)
206 lbzu rWORD3, 1(rSTR1)
207 lbzu rWORD4, 1(rSTR2)
208 cmpdi cr1, rWORD3, 0
209 bdz L(u3)
210 cmpd rWORD3, rWORD4
211 beq- cr1, L(u3)
212 bne- L(u3)
213 lbzu rWORD1, 1(rSTR1)
214 lbzu rWORD2, 1(rSTR2)
215 b L(u1)
216
217 L(u3): sub rRTN, rWORD3, rWORD4
218 blr
219 L(u4): sub rRTN, rWORD1, rWORD2
220 blr
221 L(ux):
222 li rRTN, 0
223 blr
224 END (STRNCMP)
225 libc_hidden_builtin_def (strncmp)