From 80980e29fb2249ab5c57c7b8d158800318490978 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Tue, 26 Jan 2021 21:26:27 -0800 Subject: [PATCH] working on code --- .gitignore | 3 +- Makefile | 11 +- compile_flags.txt | 1 + include/simplev_cpp.h | 315 +++++++++++++++++++++++++++++++++--- tests/test_add/expected.s | 90 +++++++++++ tests/test_add/test.cpp | 24 +++ tests/test_include/test.cpp | 3 + tests/test_include_c/test.c | 3 + tests/test_setvl/expected.s | 6 +- tests/test_setvl/test.cpp | 7 +- tests/test_sub/expected.s | 90 +++++++++++ tests/test_sub/test.cpp | 25 +++ 12 files changed, 542 insertions(+), 36 deletions(-) create mode 100644 tests/test_add/expected.s create mode 100644 tests/test_add/test.cpp create mode 100644 tests/test_sub/expected.s create mode 100644 tests/test_sub/test.cpp diff --git a/.gitignore b/.gitignore index 7f07300..9b867a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: LGPL-2.1-or-later # See Notices.txt for copyright information -\.cache \ No newline at end of file +/.cache +/.vscode diff --git a/Makefile b/Makefile index 80e96b6..30d97c4 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CC = powerpc64le-linux-gnu-gcc CXX = powerpc64le-linux-gnu-g++ -CFLAGS = -O3 -Iinclude -g0 -mno-altivec -mno-vsx +CFLAGS = -O3 -Iinclude -g0 -mno-altivec -mno-vsx -Wall CXXFLAGS = -std=gnu++17 all: tests @@ -14,20 +14,21 @@ TESTS_DIR := $(dir $(TESTS_SOURCE)) TESTS_BUILD_DIR := $(addprefix build/,$(TESTS_DIR)) TESTS_DIFF := $(addsuffix diff.txt,$(TESTS_BUILD_DIR)) TESTS_FILTERED_OUT := $(addsuffix filtered-out.s,$(TESTS_BUILD_DIR)) +EXTRA_DEPS := Makefile $(wildcard include/*.h) build/tests/%/: mkdir -p $@ -build/tests/%/out.s: tests/%/test.cpp Makefile | build/tests/%/ +build/tests/%/out.s: tests/%/test.cpp $(EXTRA_DEPS) | build/tests/%/ $(CXX) -S $(CFLAGS) $(CXXFLAGS) $< -o $@ -build/tests/%/out.s: tests/%/test.c Makefile | build/tests/%/ +build/tests/%/out.s: tests/%/test.c $(EXTRA_DEPS) | build/tests/%/ $(CC) -S $(CFLAGS) $< -o $@ -build/tests/%/filtered-out.s: build/tests/%/out.s Makefile +build/tests/%/filtered-out.s: build/tests/%/out.s $(EXTRA_DEPS) sed 's/\(^\t.ident\t"\).*"/\1GCC"/' < $< > $@ -build/tests/%/diff.txt: tests/%/expected.s build/tests/%/filtered-out.s Makefile +build/tests/%/diff.txt: tests/%/expected.s build/tests/%/filtered-out.s $(EXTRA_DEPS) diff -u $< build/$(dir $<)filtered-out.s > $@ || true .PRECIOUS: build/tests/%/out.s build/tests/%/filtered-out.s build/tests/%/ diff --git a/compile_flags.txt b/compile_flags.txt index 7d483f5..18b5a51 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -15,6 +15,7 @@ /usr/powerpc64le-linux-gnu/include -isystem /usr/include +-Wall -O3 -Iinclude -g0 diff --git a/include/simplev_cpp.h b/include/simplev_cpp.h index 297f71b..23941a2 100644 --- a/include/simplev_cpp.h +++ b/include/simplev_cpp.h @@ -2,6 +2,8 @@ // See Notices.txt for copyright information #pragma once +#include +#include #ifndef __cplusplus #error to use SimpleV Cpp with C, include "simplev_c.h" #endif @@ -10,21 +12,31 @@ #include #include -template -struct SVVecTypeStruct; +// we need to use the register keyword as part of assigning particular variables to registers for +// inline assembly +#define DECLARE_ASM_REG(type, name, reg, value) \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wregister\"") register type name asm(reg); \ + _Pragma("GCC diagnostic pop") name = value; -template -using SVVecType = typename SVVecTypeStruct::Type; +namespace sv +{ +template +struct VecTypeStruct; + +template +using VecType = typename VecTypeStruct::Type; -#define MAKE_VEC_TYPE(size) \ - template \ - struct SVVecTypeStruct> \ - final \ - { \ - typedef ElementType Type __attribute__((vector_size(size))); \ +#define MAKE_VEC_TYPE(size) \ + template \ + struct VecTypeStruct> \ + final \ + { \ + typedef ElementType Type __attribute__((vector_size(size))); \ }; MAKE_VEC_TYPE(1) @@ -38,10 +50,12 @@ MAKE_VEC_TYPE(8) #undef MAKE_VEC_TYPE -template -struct SVVec final +template +struct Vec final { - using Type = SVVecType; + static_assert(MAX_VL > 0 && MAX_VL <= 64); + static_assert(SUB_VL >= 1 && SUB_VL <= 4); + using Type = VecType; Type value; }; @@ -53,11 +67,10 @@ struct Mask final template struct VL final { + static_assert(MAX_VL > 0 && MAX_VL <= 64); std::size_t value = MAX_VL; }; -namespace opcodes -{ inline constexpr std::size_t PRIMARY_OPCODE_SHIFT = 32 - 6; /// unofficial value. see https://libre-soc.org/openpower/sv/setvl/ /// FIXME: incorrect extended opcode value @@ -66,7 +79,181 @@ inline constexpr std::size_t SETVL_IMMEDIATE_SHIFT = 32 - 7 - 16; inline constexpr std::size_t REG_FIELD_WIDTH = 5; inline constexpr std::size_t XL_FORM_RT_SHIFT = 32 - REG_FIELD_WIDTH - 6; inline constexpr std::size_t XL_FORM_RA_SHIFT = 32 - REG_FIELD_WIDTH - 11; -} // namespace opcodes + +constexpr std::uint32_t encode_sv_prefix(std::uint32_t remapped_bits24) +{ + std::uint32_t expanded26bits = (remapped_bits24 & 0x3FFFFFUL) + | ((remapped_bits24 & 0x400000UL) << 1) + | ((remapped_bits24 & 0x800000UL) << 2); + expanded26bits |= 0x01400000UL; // set 2 constant-1 bits + return expanded26bits | (1UL << PRIMARY_OPCODE_SHIFT); +} + +enum class MaskMode : std::uint32_t +{ + Int = 0, + CR = 1, +}; + +enum class MaskField : std::uint32_t +{ + Always = 0, + OneShlR3 = 1, // 1 << R3 + R3 = 2, + NotR3 = 3, + R10 = 4, + NotR10 = 5, + R30 = 6, + NotR30 = 7, + + Lt = 0, + NL = 1, + Gt = 2, + NG = 3, + Eq = 4, + NE = 5, + SO = 6, + NS = 7, +}; + +enum class Mode : std::uint32_t +{ + Normal = 0, + // TODO: fill out +}; + +enum class ElementWidth : std::uint32_t +{ + I8 = 3, + I16 = 2, + I32 = 1, + I64 = 0, + Default = 0, + F64 = 0, + F32 = 1, + F16 = 2, + BF16 = 3, +}; + +template > +struct ElementProperties; + +template +struct ElementProperties final +{ + static inline constexpr ElementWidth element_width = ElementWidth::I8; +}; + +template +struct ElementProperties final +{ + static inline constexpr ElementWidth element_width = ElementWidth::I16; +}; + +template +struct ElementProperties final +{ + static inline constexpr ElementWidth element_width = ElementWidth::I32; +}; + +template +struct ElementProperties final +{ + static inline constexpr ElementWidth element_width = ElementWidth::I64; +}; + +template <> +struct ElementProperties final +{ + static inline constexpr ElementWidth element_width = ElementWidth::F32; +}; + +template <> +struct ElementProperties final +{ + static inline constexpr ElementWidth element_width = ElementWidth::F64; +}; + +template +inline constexpr ElementWidth element_width_for = ElementProperties::element_width; + +template +constexpr std::uint32_t encode_sv_prefix(MaskMode mask_mode, + MaskField mask_field, + ElementWidth elwidth, + ElementWidth elwidth_src, + Mode mode, + std::uint32_t remapped_bits24) +{ + static_assert(SUB_VL >= 1 && SUB_VL <= 4); + remapped_bits24 |= static_cast(mask_mode) << (23 - 0); + remapped_bits24 |= static_cast(mask_field) << (23 - 3); + remapped_bits24 |= static_cast(elwidth) << (23 - 5); + remapped_bits24 |= static_cast(elwidth_src) << (23 - 7); + remapped_bits24 |= static_cast(SUB_VL - 1) << (23 - 9); + remapped_bits24 |= static_cast(mode); + return remapped_bits24; +} + +enum class RegExtra2 : std::uint32_t +{ + Scalar0 = 0, + Scalar1 = 1, + Vector0 = 2, + Vector1 = 3, +}; + +enum class RegExtra3 : std::uint32_t +{ + Scalar0 = 0, + Scalar1 = 1, + Scalar2 = 2, + Scalar3 = 3, + Vector0 = 4, + Vector1 = 5, + Vector2 = 6, + Vector3 = 7, +}; + +template +constexpr std::uint32_t encode_sv_prefix_rm_1p_3s1d(MaskMode mask_mode, + MaskField mask_field, + ElementWidth elwidth, + ElementWidth elwidth_src, + Mode mode, + RegExtra2 rdest_extra2, + RegExtra2 rsrc1_extra2, + RegExtra2 rsrc2_extra2, + RegExtra2 rsrc3_extra2) +{ + std::uint32_t remapped_bits24 = 0; + remapped_bits24 |= static_cast(rdest_extra2) << (23 - 11); + remapped_bits24 |= static_cast(rsrc1_extra2) << (23 - 13); + remapped_bits24 |= static_cast(rsrc2_extra2) << (23 - 15); + remapped_bits24 |= static_cast(rsrc3_extra2) << (23 - 17); + return encode_sv_prefix( + mask_mode, mask_field, elwidth, elwidth_src, mode, remapped_bits24); +} + +template +constexpr std::uint32_t encode_sv_prefix_rm_1p_2s1d(MaskMode mask_mode, + MaskField mask_field, + ElementWidth elwidth, + ElementWidth elwidth_src, + Mode mode, + RegExtra3 rdest_extra3, + RegExtra3 rsrc1_extra3, + RegExtra3 rsrc2_extra3) +{ + std::uint32_t remapped_bits24 = 0; + remapped_bits24 |= static_cast(rdest_extra3) << (23 - 12); + remapped_bits24 |= static_cast(rsrc1_extra3) << (23 - 15); + remapped_bits24 |= static_cast(rsrc2_extra3) << (23 - 18); + return encode_sv_prefix( + mask_mode, mask_field, elwidth, elwidth_src, mode, remapped_bits24); +} #define SETVL_ASM(retval, vl) \ "# setvl " retval ", " vl \ @@ -74,19 +261,97 @@ inline constexpr std::size_t XL_FORM_RA_SHIFT = 32 - REG_FIELD_WIDTH - 11; ".long %[setvl_opcode] | (" retval " << %[xl_form_rt_shift]) | (" vl \ " << %[xl_form_ra_shift]) | ((%[max_vl] - 1) << %[setvl_immediate_shift])" -#define SETVL_ASM_INPUT_ARGS() \ - [setvl_opcode] "n"(opcodes::SETVL_OPCODE), \ - [setvl_immediate_shift] "n"(opcodes::SETVL_IMMEDIATE_SHIFT), \ - [xl_form_rt_shift] "n"(opcodes::XL_FORM_RT_SHIFT), \ - [xl_form_ra_shift] "n"(opcodes::XL_FORM_RA_SHIFT) +#define SETVL_ASM_INPUT_ARGS() \ + [setvl_opcode] "n"(sv::SETVL_OPCODE), [setvl_immediate_shift] "n"(sv::SETVL_IMMEDIATE_SHIFT), \ + [xl_form_rt_shift] "n"(sv::XL_FORM_RT_SHIFT), [xl_form_ra_shift] "n"(sv::XL_FORM_RA_SHIFT) template -inline __attribute__((always_inline)) VL sv_setvl(std::size_t vl) +inline __attribute__((always_inline)) VL setvl(std::size_t vl) { - static_assert(MAX_VL > 0 && MAX_VL < 64); VL retval; asm(SETVL_ASM("%[retval]", "%[vl]") : [retval] "=b"(retval.value) : [vl] "b"(vl), [max_vl] "n"(MAX_VL), SETVL_ASM_INPUT_ARGS()); return retval; } + +template >> +inline __attribute__((always_inline)) Vec add( + Vec ra, + Vec rb, + VL vl = VL(), + Mask mask = Mask()) +{ + constexpr std::uint32_t prefix = + encode_sv_prefix_rm_1p_2s1d(MaskMode::Int, + MaskField::R10, + element_width_for, + element_width_for, + Mode::Normal, + RegExtra3::Vector0, + RegExtra3::Vector0, + RegExtra3::Vector0); + Vec retval; + DECLARE_ASM_REG(std::uint64_t, mask_r10, "r10", mask.value) + asm(SETVL_ASM("0", "%[vl]") "\n\t" + "# sv.add ew=%[el_width], subvl=%[sub_vl], m=%[mask], %[retval].v, %[ra].v, %[rb].v\n\t" + ".long %[prefix]\n\t" + "add %[retval], %[ra], %[rb]" + : [retval] "=&b"(retval.value) + : [vl] "b"(vl), + [max_vl] "n"(MAX_VL), + [sub_vl] "n"(SUB_VL), + [mask] "b"(mask_r10), + [ra] "b"(ra.value), + [rb] "b"(rb.value), + [el_width] "n"(8 * sizeof(ElementType)), + [prefix] "n"(prefix), + SETVL_ASM_INPUT_ARGS()); + return retval; +} + +template >> +inline __attribute__((always_inline)) Vec sub( + Vec rb, // intentionally reversed since we use sv.subf instruction + Vec ra, + VL vl = VL(), + Mask mask = Mask()) +{ + constexpr std::uint32_t prefix = + encode_sv_prefix_rm_1p_2s1d(MaskMode::Int, + MaskField::R10, + element_width_for, + element_width_for, + Mode::Normal, + RegExtra3::Vector0, + RegExtra3::Vector0, + RegExtra3::Vector0); + Vec retval; + DECLARE_ASM_REG(std::uint64_t, mask_r10, "r10", mask.value) + asm(SETVL_ASM("0", "%[vl]") "\n\t" + "# sv.subf ew=%[el_width], subvl=%[sub_vl], m=%[mask], %[retval].v, %[ra].v, %[rb].v\n\t" + ".long %[prefix]\n\t" + "subf %[retval], %[ra], %[rb]" + : [retval] "=&b"(retval.value) + : [vl] "b"(vl), + [max_vl] "n"(MAX_VL), + [sub_vl] "n"(SUB_VL), + [mask] "b"(mask_r10), + [ra] "b"(ra.value), + [rb] "b"(rb.value), + [el_width] "n"(8 * sizeof(ElementType)), + [prefix] "n"(prefix), + SETVL_ASM_INPUT_ARGS()); + return retval; +} + +#undef SETVL_ASM +#undef SETVL_ASM_INPUT_ARGS +#undef DECLARE_ASM_REG +} // namespace sv diff --git a/tests/test_add/expected.s b/tests/test_add/expected.s new file mode 100644 index 0000000..15e9cd2 --- /dev/null +++ b/tests/test_add/expected.s @@ -0,0 +1,90 @@ + .file "test.cpp" + .abiversion 2 + .section ".text" + .align 2 + .p2align 4,,15 + .globl _Z10test_add_1N2sv3VecIhLm1ELm4EEES1_ + .type _Z10test_add_1N2sv3VecIhLm1ELm4EEES1_, @function +_Z10test_add_1N2sv3VecIhLm1ELm4EEES1_: +.LFB27: + .cfi_startproc + li 9,4 + li 10,-1 +#APP + # 312 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.add ew=8, subvl=1, m=10, 8.v, 3.v, 4.v + .long 5186688 + add 8, 3, 4 + # 0 "" 2 +#NO_APP + mr 3,8 + blr + .long 0 + .byte 0,9,0,0,0,0,0,0 + .cfi_endproc +.LFE27: + .size _Z10test_add_1N2sv3VecIhLm1ELm4EEES1_,.-_Z10test_add_1N2sv3VecIhLm1ELm4EEES1_ + .align 2 + .p2align 4,,15 + .globl _Z10test_add_2N2sv3VecItLm1ELm4EEES1_ + .type _Z10test_add_2N2sv3VecItLm1ELm4EEES1_, @function +_Z10test_add_2N2sv3VecItLm1ELm4EEES1_: +.LFB34: + .cfi_startproc + li 9,4 + li 10,-1 +#APP + # 312 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.add ew=16, subvl=1, m=10, 8.v, 3.v, 4.v + .long 4859008 + add 8, 3, 4 + # 0 "" 2 +#NO_APP + mr 3,8 + blr + .long 0 + .byte 0,9,0,0,0,0,0,0 + .cfi_endproc +.LFE34: + .size _Z10test_add_2N2sv3VecItLm1ELm4EEES1_,.-_Z10test_add_2N2sv3VecItLm1ELm4EEES1_ + .align 2 + .p2align 4,,15 + .globl _Z10test_add_3N2sv3VecItLm1ELm4EEES1_S1_ + .type _Z10test_add_3N2sv3VecItLm1ELm4EEES1_S1_, @function +_Z10test_add_3N2sv3VecItLm1ELm4EEES1_S1_: +.LFB35: + .cfi_startproc + li 9,4 + li 10,-1 +#APP + # 312 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.add ew=16, subvl=1, m=10, 8.v, 4.v, 5.v + .long 4859008 + add 8, 4, 5 + # 0 "" 2 +#NO_APP + mr 5,8 +#APP + # 312 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.add ew=16, subvl=1, m=10, 8.v, 3.v, 5.v + .long 4859008 + add 8, 3, 5 + # 0 "" 2 +#NO_APP + mr 3,8 + blr + .long 0 + .byte 0,9,0,0,0,0,0,0 + .cfi_endproc +.LFE35: + .size _Z10test_add_3N2sv3VecItLm1ELm4EEES1_S1_,.-_Z10test_add_3N2sv3VecItLm1ELm4EEES1_S1_ + .ident "GCC" + .section .note.GNU-stack,"",@progbits diff --git a/tests/test_add/test.cpp b/tests/test_add/test.cpp new file mode 100644 index 0000000..68adce0 --- /dev/null +++ b/tests/test_add/test.cpp @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// See Notices.txt for copyright information +#include +#include + +#include "simplev_cpp.h" + +using u8x4 = sv::Vec; +using u16x4 = sv::Vec; + +u8x4 test_add_1(u8x4 a, u8x4 b) +{ + return sv::add(a, b); +} + +u16x4 test_add_2(u16x4 a, u16x4 b) +{ + return sv::add(a, b); +} + +u16x4 test_add_3(u16x4 a, u16x4 b, u16x4 c) +{ + return sv::add(a, sv::add(b, c)); +} \ No newline at end of file diff --git a/tests/test_include/test.cpp b/tests/test_include/test.cpp index 5aeb04c..b7328ed 100644 --- a/tests/test_include/test.cpp +++ b/tests/test_include/test.cpp @@ -1,2 +1,5 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// See Notices.txt for copyright information + // test just including file #include "simplev_cpp.h" diff --git a/tests/test_include_c/test.c b/tests/test_include_c/test.c index 01ff6eb..1e97e4f 100644 --- a/tests/test_include_c/test.c +++ b/tests/test_include_c/test.c @@ -1,2 +1,5 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// See Notices.txt for copyright information + // test just including file #include "simplev_c.h" diff --git a/tests/test_setvl/expected.s b/tests/test_setvl/expected.s index a376ebb..a9d1764 100644 --- a/tests/test_setvl/expected.s +++ b/tests/test_setvl/expected.s @@ -6,10 +6,10 @@ .globl _Z10test_setvlm .type _Z10test_setvlm, @function _Z10test_setvlm: -.LFB21: +.LFB27: .cfi_startproc #APP - # 90 "include/simplev_cpp.h" 1 + # 274 "include/simplev_cpp.h" 1 # setvl 3, 3, MVL=8 .long 1275068416 | (3 << 21) | (3 << 16) | ((8 - 1) << 9) # 0 "" 2 @@ -18,7 +18,7 @@ _Z10test_setvlm: .long 0 .byte 0,9,0,0,0,0,0,0 .cfi_endproc -.LFE21: +.LFE27: .size _Z10test_setvlm,.-_Z10test_setvlm .ident "GCC" .section .note.GNU-stack,"",@progbits diff --git a/tests/test_setvl/test.cpp b/tests/test_setvl/test.cpp index a6df168..473a2cb 100644 --- a/tests/test_setvl/test.cpp +++ b/tests/test_setvl/test.cpp @@ -1,9 +1,12 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// See Notices.txt for copyright information + #include #include #include "simplev_cpp.h" -VL<8> test_setvl(std::size_t v) +sv::VL<8> test_setvl(std::size_t v) { - return sv_setvl<8>(v); + return sv::setvl<8>(v); } \ No newline at end of file diff --git a/tests/test_sub/expected.s b/tests/test_sub/expected.s new file mode 100644 index 0000000..e4d457e --- /dev/null +++ b/tests/test_sub/expected.s @@ -0,0 +1,90 @@ + .file "test.cpp" + .abiversion 2 + .section ".text" + .align 2 + .p2align 4,,15 + .globl _Z10test_sub_1N2sv3VecIhLm1ELm4EEES1_ + .type _Z10test_sub_1N2sv3VecIhLm1ELm4EEES1_, @function +_Z10test_sub_1N2sv3VecIhLm1ELm4EEES1_: +.LFB27: + .cfi_startproc + li 9,4 + li 10,-1 +#APP + # 350 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.subf ew=8, subvl=1, m=10, 8.v, 4.v, 3.v + .long 5186688 + subf 8, 4, 3 + # 0 "" 2 +#NO_APP + mr 3,8 + blr + .long 0 + .byte 0,9,0,0,0,0,0,0 + .cfi_endproc +.LFE27: + .size _Z10test_sub_1N2sv3VecIhLm1ELm4EEES1_,.-_Z10test_sub_1N2sv3VecIhLm1ELm4EEES1_ + .align 2 + .p2align 4,,15 + .globl _Z10test_sub_2N2sv3VecItLm1ELm4EEES1_ + .type _Z10test_sub_2N2sv3VecItLm1ELm4EEES1_, @function +_Z10test_sub_2N2sv3VecItLm1ELm4EEES1_: +.LFB34: + .cfi_startproc + li 9,4 + li 10,-1 +#APP + # 350 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.subf ew=16, subvl=1, m=10, 8.v, 4.v, 3.v + .long 4859008 + subf 8, 4, 3 + # 0 "" 2 +#NO_APP + mr 3,8 + blr + .long 0 + .byte 0,9,0,0,0,0,0,0 + .cfi_endproc +.LFE34: + .size _Z10test_sub_2N2sv3VecItLm1ELm4EEES1_,.-_Z10test_sub_2N2sv3VecItLm1ELm4EEES1_ + .align 2 + .p2align 4,,15 + .globl _Z10test_sub_3N2sv3VecItLm1ELm4EEES1_S1_ + .type _Z10test_sub_3N2sv3VecItLm1ELm4EEES1_S1_, @function +_Z10test_sub_3N2sv3VecItLm1ELm4EEES1_S1_: +.LFB35: + .cfi_startproc + li 9,4 + li 10,-1 +#APP + # 350 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.subf ew=16, subvl=1, m=10, 8.v, 5.v, 4.v + .long 4859008 + subf 8, 5, 4 + # 0 "" 2 +#NO_APP + mr 5,8 +#APP + # 350 "include/simplev_cpp.h" 1 + # setvl 0, 9, MVL=4 + .long 1275068416 | (0 << 21) | (9 << 16) | ((4 - 1) << 9) + # sv.subf ew=16, subvl=1, m=10, 8.v, 5.v, 3.v + .long 4859008 + subf 8, 5, 3 + # 0 "" 2 +#NO_APP + mr 3,8 + blr + .long 0 + .byte 0,9,0,0,0,0,0,0 + .cfi_endproc +.LFE35: + .size _Z10test_sub_3N2sv3VecItLm1ELm4EEES1_S1_,.-_Z10test_sub_3N2sv3VecItLm1ELm4EEES1_S1_ + .ident "GCC" + .section .note.GNU-stack,"",@progbits diff --git a/tests/test_sub/test.cpp b/tests/test_sub/test.cpp new file mode 100644 index 0000000..06ad9d8 --- /dev/null +++ b/tests/test_sub/test.cpp @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// See Notices.txt for copyright information + +#include +#include + +#include "simplev_cpp.h" + +using u8x4 = sv::Vec; +using u16x4 = sv::Vec; + +u8x4 test_sub_1(u8x4 a, u8x4 b) +{ + return sv::sub(a, b); +} + +u16x4 test_sub_2(u16x4 a, u16x4 b) +{ + return sv::sub(a, b); +} + +u16x4 test_sub_3(u16x4 a, u16x4 b, u16x4 c) +{ + return sv::sub(a, sv::sub(b, c)); +} \ No newline at end of file -- 2.30.2