initial commit
[glibc.git] / sysdeps / x86_64 / add_n.S
1 /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2 sum in a third limb vector.
3 Copyright (C) 2006-2022 Free Software Foundation, Inc.
4 This file is part of the GNU MP Library.
5
6 The GNU MP Library is free software; you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or (at your
9 option) any later version.
10
11 The GNU MP Library is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with the GNU MP Library; see the file COPYING.LIB. If not,
18 see <https://www.gnu.org/licenses/>. */
19
20 #include "sysdep.h"
21 #include "asm-syntax.h"
22
23 #define rp %rdi
24 #define up %rsi
25 #define vp %rdx
26 #define n %rcx
27 #define cy %r8
28
29 #ifndef func
30 # define func __mpn_add_n
31 # define ADCSBB adc
32 #endif
33
34 .text
35 ENTRY (func)
36 xor %r8, %r8
37 mov (up), %r10
38 mov (vp), %r11
39
40 lea -8(up,n,8), up
41 lea -8(vp,n,8), vp
42 lea -16(rp,n,8), rp
43 mov %ecx, %eax
44 neg n
45 and $3, %eax
46 je L(b00)
47 add %rax, n /* clear low rcx bits for jrcxz */
48 cmp $2, %eax
49 jl L(b01)
50 je L(b10)
51
52 L(b11): shr %r8 /* set cy */
53 jmp L(e11)
54
55 L(b00): shr %r8 /* set cy */
56 mov %r10, %r8
57 mov %r11, %r9
58 lea 4(n), n
59 jmp L(e00)
60
61 L(b01): shr %r8 /* set cy */
62 jmp L(e01)
63
64 L(b10): shr %r8 /* set cy */
65 mov %r10, %r8
66 mov %r11, %r9
67 jmp L(e10)
68
69 L(end): ADCSBB %r11, %r10
70 mov %r10, 8(rp)
71 mov %ecx, %eax /* clear eax, ecx contains 0 */
72 adc %eax, %eax
73 ret
74
75 .p2align 4
76 L(top):
77 mov -24(up,n,8), %r8
78 mov -24(vp,n,8), %r9
79 ADCSBB %r11, %r10
80 mov %r10, -24(rp,n,8)
81 L(e00):
82 mov -16(up,n,8), %r10
83 mov -16(vp,n,8), %r11
84 ADCSBB %r9, %r8
85 mov %r8, -16(rp,n,8)
86 L(e11):
87 mov -8(up,n,8), %r8
88 mov -8(vp,n,8), %r9
89 ADCSBB %r11, %r10
90 mov %r10, -8(rp,n,8)
91 L(e10):
92 mov (up,n,8), %r10
93 mov (vp,n,8), %r11
94 ADCSBB %r9, %r8
95 mov %r8, (rp,n,8)
96 L(e01):
97 jrcxz L(end)
98 lea 4(n), n
99 jmp L(top)
100 END (func)