1 /* Optimized strcpy implementation for PowerPC64/POWER9.
2 Copyright (C) 2020-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
23 # define FUNC_NAME __stpcpy
25 # define FUNC_NAME STPCPY
29 # define FUNC_NAME strcpy
31 # define FUNC_NAME STRCPY
33 #endif /* !USE_AS_STPCPY */
35 /* Implements the function
37 char * [r3] strcpy (char *dest [r3], const char *src [r4])
41 char * [r3] stpcpy (char *dest [r3], const char *src [r4])
43 if USE_AS_STPCPY is defined.
45 The implementation can load bytes past a null terminator, but only
46 up to the next 16B boundary, so it never crosses a page. */
48 /* Load quadword at addr+offset to vreg, check for null bytes,
49 and branch to label if any are found. */
50 #define CHECK16(vreg,offset,addr,label) \
51 lxv vreg+32,offset(addr); \
52 vcmpequb. v6,vreg,v18; \
56 ENTRY_TOCLESS (FUNC_NAME, 4)
59 vspltisb v18,0 /* Zeroes in v18 */
60 vspltisb v19,-1 /* 0xFF bytes in v19 */
62 /* Next 16B-aligned address. Prepare address for L(loop). */
68 /* Align data and fill bytes not loaded with non matching char. */
73 vcmpequb. v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */
76 /* There's a null byte. */
77 vctzlsbb r8,v6 /* Number of trailing zeroes */
78 addi r9,r8,1 /* Add null byte. */
79 sldi r10,r9,56 /* stxvl wants size in top 8 bits. */
80 stxvl 32+v0,r3,r10 /* Partial store */
83 /* stpcpy returns the dest address plus the size not counting the
90 sldi r10,r8,56 /* stxvl wants size in top 8 bits */
91 stxvl 32+v0,r3,r10 /* Partial store */
95 CHECK16(v0,0,r5,tail1)
96 CHECK16(v1,16,r5,tail2)
97 CHECK16(v2,32,r5,tail3)
98 CHECK16(v3,48,r5,tail4)
99 CHECK16(v4,64,r5,tail5)
100 CHECK16(v5,80,r5,tail6)
116 vctzlsbb r8,v6 /* Number of trailing zeroes */
117 addi r9,r8,1 /* Add null terminator */
118 sldi r9,r9,56 /* stxvl wants size in top 8 bits */
119 stxvl 32+v0,r11,r9 /* Partial store */
121 /* stpcpy returns the dest address plus the size not counting the
203 #ifndef USE_AS_STPCPY
204 libc_hidden_builtin_def (strcpy)