aarch64: Avoid false dependencies for SVE unary operations
authorRichard Sandiford <richard.sandiford@arm.com>
Wed, 25 Nov 2020 16:14:20 +0000 (16:14 +0000)
committerRichard Sandiford <richard.sandiford@arm.com>
Wed, 25 Nov 2020 16:14:20 +0000 (16:14 +0000)
For calls like:

        z0 = svabs_s8_x (p0, z1)

we previously generated:

        abs     z0.b, p0/m, z1.b

However, this creates a false dependency on z0 (the merge input).
This can lead to strange results in some cases, e.g. serialising
the operation behind arbitrary earlier operations, or preventing
two iterations of a loop from being executed in parallel.

This patch therefore ties the input to the output, using a MOVPRFX
if necessary and possible.  (The SVE2 unary long instructions do
not support MOVPRFX.)

When testing the patch, I hit a bug in the big-endian SVE move
optimisation in aarch64_maybe_expand_sve_subreg_move.  I don't
have an indepenedent testcase for it, so I didn't split it out
into a separate patch.

gcc/
* config/aarch64/aarch64.c (aarch64_maybe_expand_sve_subreg_move):
Do not optimize LRA subregs.
* config/aarch64/aarch64-sve.md
(@aarch64_pred_<SVE_INT_UNARY:optab><mode>): Tie the input to the
output.
(@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>): Likewise.
(*<ANY_EXTEND:optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2): Likewise.
(@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>): Likewise.
(*cnot<mode>): Likewise.
(@aarch64_pred_<SVE_COND_FP_UNARY:optab><mode>): Likewise.
(@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>):
Likewise.
(@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>):
Likewise.
(@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>):
Likewise.
(@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>):
Likewise.
(@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>):
Likewise.
(@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>):
Likewise.
(@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>):
Likewise.
* config/aarch64/aarch64-sve2.md
(@aarch64_pred_<SVE2_COND_FP_UNARY_LONG:sve_fp_op><mode>): Likewise.
(@aarch64_pred_<SVE2_COND_FP_UNARY_NARROWB:sve_fp_op><mode>): Likewise.
(@aarch64_pred_<SVE2_U32_UNARY:sve_int_op><mode>): Likewise.
(@aarch64_pred_<SVE2_COND_INT_UNARY_FP:sve_fp_op><mode>): Likewise.

gcc/testsuite/
* gcc.target/aarch64/sve/acle/asm/abs_f16.c (abs_f16_x_untied): Expect
a MOVPRFX instruction.
* gcc.target/aarch64/sve/acle/asm/abs_f32.c (abs_f32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/abs_f64.c (abs_f64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/abs_s16.c (abs_s16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/abs_s32.c (abs_s32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/abs_s64.c (abs_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/abs_s8.c (abs_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cls_s16.c (cls_s16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cls_s32.c (cls_s32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cls_s64.c (cls_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cls_s8.c (cls_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_s16.c (clz_s16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_s32.c (clz_s32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_s64.c (clz_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_s8.c (clz_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_u16.c (clz_u16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_u32.c (clz_u32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_u64.c (clz_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/clz_u8.c (clz_u8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_s16.c (cnot_s16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_s32.c (cnot_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_s64.c (cnot_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_s8.c (cnot_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_u16.c (cnot_u16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_u32.c (cnot_u32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_u64.c (cnot_u64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cnot_u8.c (cnot_u8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_bf16.c (cnt_bf16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_f16.c (cnt_f16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_f32.c (cnt_f32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_f64.c (cnt_f64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_s16.c (cnt_s16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_s32.c (cnt_s32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_s64.c (cnt_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_s8.c (cnt_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_u16.c (cnt_u16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_u32.c (cnt_u32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_u64.c (cnt_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cnt_u8.c (cnt_u8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_bf16.c (cvt_bf16_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_f16.c (cvt_f16_f32_x_untied)
(cvt_f16_f64_x_untied, cvt_f16_s16_x_untied, cvt_f16_s32_x_untied)
(cvt_f16_s64_x_untied, cvt_f16_u16_x_untied, cvt_f16_u32_x_untied)
(cvt_f16_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_f32.c (cvt_f32_f16_x_untied)
(cvt_f32_f64_x_untied, cvt_f32_s16_x_untied, cvt_f32_s32_x_untied)
(cvt_f32_s64_x_untied, cvt_f32_u16_x_untied, cvt_f32_u32_x_untied)
(cvt_f32_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_f64.c (cvt_f64_f16_x_untied)
(cvt_f64_f32_x_untied, cvt_f64_s16_x_untied, cvt_f64_s32_x_untied)
(cvt_f64_s64_x_untied, cvt_f64_u16_x_untied, cvt_f64_u32_x_untied)
(cvt_f64_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_s16.c (cvt_s16_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_s32.c (cvt_s32_f16_x_untied)
(cvt_s32_f32_x_untied, cvt_s32_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_s64.c (cvt_s64_f16_x_untied)
(cvt_s64_f32_x_untied, cvt_s64_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_u16.c (cvt_u16_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_u32.c (cvt_u32_f16_x_untied)
(cvt_u32_f32_x_untied, cvt_u32_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/cvt_u64.c (cvt_u64_f16_x_untied)
(cvt_u64_f32_x_untied, cvt_u64_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/extb_s16.c (extb_s16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/extb_s32.c (extb_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/extb_s64.c (extb_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/exth_s32.c (exth_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/exth_s64.c (exth_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/extw_s64.c (extw_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/neg_f16.c (neg_f16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/neg_f32.c (neg_f32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/neg_f64.c (neg_f64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/neg_s16.c (neg_s16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/neg_s32.c (neg_s32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/neg_s64.c (neg_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/neg_s8.c (neg_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_s16.c (not_s16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_s32.c (not_s32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_s64.c (not_s64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_s8.c (not_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_u16.c (not_u16_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_u32.c (not_u32_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_u64.c (not_u64_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/not_u8.c (not_u8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_s16.c (rbit_s16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_s32.c (rbit_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_s64.c (rbit_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_s8.c (rbit_s8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_u16.c (rbit_u16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_u32.c (rbit_u32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_u64.c (rbit_u64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rbit_u8.c (rbit_u8_x_untied): Ditto.
* gcc.target/aarch64/sve/acle/asm/recpx_f16.c (recpx_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/recpx_f32.c (recpx_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/recpx_f64.c (recpx_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revb_s16.c (revb_s16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revb_s32.c (revb_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revb_s64.c (revb_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revb_u16.c (revb_u16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revb_u32.c (revb_u32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revb_u64.c (revb_u64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revh_s32.c (revh_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revh_s64.c (revh_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revh_u32.c (revh_u32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revh_u64.c (revh_u64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revw_s64.c (revw_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/revw_u64.c (revw_u64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rinta_f16.c (rinta_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rinta_f32.c (rinta_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rinta_f64.c (rinta_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rinti_f16.c (rinti_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rinti_f32.c (rinti_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rinti_f64.c (rinti_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintm_f16.c (rintm_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintm_f32.c (rintm_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintm_f64.c (rintm_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintn_f16.c (rintn_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintn_f32.c (rintn_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintn_f64.c (rintn_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintp_f16.c (rintp_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintp_f32.c (rintp_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintp_f64.c (rintp_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintx_f16.c (rintx_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintx_f32.c (rintx_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintx_f64.c (rintx_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintz_f16.c (rintz_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintz_f32.c (rintz_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/rintz_f64.c (rintz_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/sqrt_f16.c (sqrt_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/sqrt_f32.c (sqrt_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve/acle/asm/sqrt_f64.c (sqrt_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/cvtx_f32.c (cvtx_f32_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/logb_f16.c (logb_f16_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/logb_f32.c (logb_f32_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/logb_f64.c (logb_f64_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qabs_s16.c (qabs_s16_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qabs_s32.c (qabs_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qabs_s64.c (qabs_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qabs_s8.c (qabs_s8_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qneg_s16.c (qneg_s16_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qneg_s32.c (qneg_s32_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qneg_s64.c (qneg_s64_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/qneg_s8.c (qneg_s8_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/recpe_u32.c (recpe_u32_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/rsqrte_u32.c (rsqrte_u32_x_untied):
Ditto.
* gcc.target/aarch64/sve2/acle/asm/cvtlt_f32.c
(cvtlt_f32_f16_x_untied): Expect a MOV instruction.
* gcc.target/aarch64/sve2/acle/asm/cvtlt_f64.c
(cvtlt_f64_f32_x_untied): Likewise.

136 files changed:
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64-sve2.md
gcc/config/aarch64/aarch64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_bf16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u8.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_s64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_u64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f64.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f16.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f32.c
gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f64.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtx_f32.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/logb_f16.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/logb_f32.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/logb_f64.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qabs_s16.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qabs_s32.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qabs_s64.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qabs_s8.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qneg_s16.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qneg_s32.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qneg_s64.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qneg_s8.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/recpe_u32.c
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rsqrte_u32.c

index 455b025521f21f1cc1b0731ca612acfdb087de88..6359c40bdecda6c126bd70bef66561dd1da44dc9 100644 (file)
 
 ;; Integer unary arithmetic predicated with a PTRUE.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
        (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (SVE_INT_UNARY:SVE_I
-            (match_operand:SVE_I 2 "register_operand" "w"))]
+            (match_operand:SVE_I 2 "register_operand" "0, w"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated integer unary arithmetic with merging.
 
 ;; Predicated integer unary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (unspec:SVE_FULL_I
-            [(match_operand:SVE_FULL_I 2 "register_operand" "w")]
+            [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")]
             SVE_INT_UNARY)]
          UNSPEC_PRED_X))]
   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Another way of expressing the REVB, REVH and REVW patterns, with this
 ;; of lanes and the data mode decides the granularity of the reversal within
 ;; each lane.
 (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
        (unspec:SVE_ALL
-         [(match_operand:PRED_HSD 1 "register_operand" "Upl")
+         [(match_operand:PRED_HSD 1 "register_operand" "Upl, Upl")
           (unspec:SVE_ALL
-            [(match_operand:SVE_ALL 2 "register_operand" "w")]
+            [(match_operand:SVE_ALL 2 "register_operand" "0, w")]
             UNSPEC_REVBHW)]
          UNSPEC_PRED_X))]
   "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
-  "rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>"
+  "@
+   rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
+   movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated integer unary operations with merging.
 
 ;; Predicated sign and zero extension from a narrower mode.
 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
-  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w")
        (unspec:SVE_HSDI
-         [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
           (ANY_EXTEND:SVE_HSDI
-            (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
+            (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
-  "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
+  "@
+   <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+   movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated truncate-and-sign-extend operations.
 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
-  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_HSDI
-         [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
           (sign_extend:SVE_FULL_HSDI
             (truncate:SVE_PARTIAL_I
-              (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
+              (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))]
          UNSPEC_PRED_X))]
   "TARGET_SVE
    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
-  "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  "@
+   sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated truncate-and-sign-extend operations with merging.
 )
 
 (define_insn "*cnot<mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_I
          [(unspec:<VPRED>
-            [(match_operand:<VPRED> 1 "register_operand" "Upl")
+            [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
              (match_operand:SI 5 "aarch64_sve_ptrue_flag")
              (eq:<VPRED>
-               (match_operand:SVE_FULL_I 2 "register_operand" "w")
+               (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
                (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
             UNSPEC_PRED_Z)
           (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
           (match_dup 3)]
          UNSPEC_SEL))]
   "TARGET_SVE"
-  "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "@
+   cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated logical inverse with merging.
 
 ;; Predicated floating-point unary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+          (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
          SVE_COND_FP_UNARY))]
   "TARGET_SVE"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated floating-point unary arithmetic with merging.
 
 ;; Predicated float-to-integer conversion, either to the same width or wider.
 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
-  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_HSDI
-         [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+          (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
          SVE_COND_FCVTI))]
   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
-  "fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
+  "@
+   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated narrowing float-to-integer conversion.
 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
-  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
        (unspec:VNx4SI_ONLY
-         [(match_operand:VNx2BI 1 "register_operand" "Upl")
+         [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
+          (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")]
          SVE_COND_FCVTI))]
   "TARGET_SVE"
-  "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
+  "@
+   fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+   movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated float-to-integer conversion with merging, either to the same
 ;; Predicated integer-to-float conversion, either to the same width or
 ;; narrower.
 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_F
-         [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
+          (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")]
          SVE_COND_ICVTF))]
   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
-  "<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  "@
+   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated widening integer-to-float conversion.
 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
-  [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
+  [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w")
        (unspec:VNx2DF_ONLY
-         [(match_operand:VNx2BI 1 "register_operand" "Upl")
+         [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
+          (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
          SVE_COND_ICVTF))]
   "TARGET_SVE"
-  "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
+  "@
+   <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
+   movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated integer-to-float conversion with merging, either to the same
 
 ;; Predicated float-to-float truncation.
 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
-  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_HSF
-         [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_SDF 2 "register_operand" "w")]
+          (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")]
          SVE_COND_FCVT))]
   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
-  "fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
+  "@
+   fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
+   movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated float-to-float truncation with merging.
 
 ;; Predicated BFCVT.
 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
-  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w")
        (unspec:VNx8BF_ONLY
-         [(match_operand:VNx4BI 1 "register_operand" "Upl")
+         [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
+          (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")]
          SVE_COND_FCVT))]
   "TARGET_SVE_BF16"
-  "bfcvt\t%0.h, %1/m, %2.s"
+  "@
+   bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated BFCVT with merging.
 
 ;; Predicated float-to-float extension.
 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
-  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w")
        (unspec:SVE_FULL_SDF
-         [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_HSF 2 "register_operand" "w")]
+          (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")]
          SVE_COND_FCVT))]
   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
-  "fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
+  "@
+   fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
+   movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated float-to-float extension with merging.
index 12dc9aaac554c2b4e3d1a76d4298f61e27c53724..772c35079c9441448534471fba4dba622322b8fc 100644 (file)
        (unspec:SVE_FULL_SDF
          [(match_operand:<VPRED> 1 "register_operand" "Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:<VNARROW> 2 "register_operand" "w")]
+          (match_operand:<VNARROW> 2 "register_operand" "0")]
          SVE2_COND_FP_UNARY_LONG))]
   "TARGET_SVE2"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>"
 )
 
 ;; Predicated convert long top with merging.
 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
 ;; it supports MOVPRFX).
 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
-  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w")
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
        (unspec:VNx4SF_ONLY
-         [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+         [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:<VWIDE> 2 "register_operand" "w")]
+          (match_operand:<VWIDE> 2 "register_operand" "0, w")]
          SVE2_COND_FP_UNARY_NARROWB))]
   "TARGET_SVE2"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated FCVTX with merging.
 
 ;; Predicated integer unary operations.
 (define_insn "@aarch64_pred_<sve_int_op><mode>"
-  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
+  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
        (unspec:VNx4SI_ONLY
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (unspec:VNx4SI_ONLY
-            [(match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
+            [(match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
             SVE2_U32_UNARY)]
          UNSPEC_PRED_X))]
   "TARGET_SVE2"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated integer unary operations with merging.
 
 ;; Predicated FLOGB.
 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
-  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w, ?&w")
        (unspec:<V_INT_EQUIV>
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           (match_operand:SI 3 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+          (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
          SVE2_COND_INT_UNARY_FP))]
   "TARGET_SVE2"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated FLOGB with merging.
index 67c25878779c96657f37b1d342407deeaec9250c..3189dfb90f93706034634a3fe686a10961607d0c 100644 (file)
@@ -5390,9 +5390,35 @@ bool
 aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
 {
   gcc_assert (BYTES_BIG_ENDIAN);
-  if (SUBREG_P (dest))
+
+  /* Do not try to optimize subregs that LRA has created for matched
+     reloads.  These subregs only exist as a temporary measure to make
+     the RTL well-formed, but they are exempt from the usual
+     TARGET_CAN_CHANGE_MODE_CLASS rules.
+
+     For example, if we have:
+
+       (set (reg:VNx8HI R1) (foo:VNx8HI (reg:VNx4SI R2)))
+
+     and the constraints require R1 and R2 to be in the same register,
+     LRA may need to create RTL such as:
+
+       (set (subreg:VNx4SI (reg:VNx8HI TMP) 0) (reg:VNx4SI R2))
+       (set (reg:VNx8HI TMP) (foo:VNx8HI (subreg:VNx4SI (reg:VNx8HI TMP) 0)))
+       (set (reg:VNx8HI R1) (reg:VNx8HI TMP))
+
+     which forces both the input and output of the original instruction
+     to use the same hard register.  But for this to work, the normal
+     rules have to be suppressed on the subreg input, otherwise LRA
+     would need to reload that input too, meaning that the process
+     would never terminate.  To compensate for this, the normal rules
+     are also suppressed for the subreg output of the first move.
+     Ignoring the special case and handling the first move normally
+     would therefore generate wrong code: we would reverse the elements
+     for the first subreg but not reverse them back for the second subreg.  */
+  if (SUBREG_P (dest) && !LRA_SUBREG_P (dest))
     dest = SUBREG_REG (dest);
-  if (SUBREG_P (src))
+  if (SUBREG_P (src) && !LRA_SUBREG_P (src))
     src = SUBREG_REG (src);
 
   /* The optimization handles two single SVE REGs with different element
index 2aa8736e645a58b2570ada42b3cbb29fb3b8b8c2..09605324cd2a0e3c40d22fd34ad666d0bd98cf2c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (abs_f16_x_tied1, svfloat16_t,
 
 /*
 ** abs_f16_x_untied:
+**     movprfx z0, z1
 **     fabs    z0\.h, p0/m, z1\.h
 **     ret
 */
index 30286afc7b7d0083a3a444c2bd4d6b90b1a93442..797a4187af941858a7cd603d10c3f5bac9b67715 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (abs_f32_x_tied1, svfloat32_t,
 
 /*
 ** abs_f32_x_untied:
+**     movprfx z0, z1
 **     fabs    z0\.s, p0/m, z1\.s
 **     ret
 */
index 28ef9fbba23bcf405d0543ffee70b41246e23cd5..4290ac390d1bc4b5a542ac536b9705cb25b2156f 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (abs_f64_x_tied1, svfloat64_t,
 
 /*
 ** abs_f64_x_untied:
+**     movprfx z0, z1
 **     fabs    z0\.d, p0/m, z1\.d
 **     ret
 */
index 3b16a9c4f0380625e7a7155785d558053ad7ba1a..fcd5c3413bfdc6274bff006d42f0e869f9750f47 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (abs_s16_x_tied1, svint16_t,
 
 /*
 ** abs_s16_x_untied:
+**     movprfx z0, z1
 **     abs     z0\.h, p0/m, z1\.h
 **     ret
 */
index 14bcbd50c46f86c7fdd4e811d0240dfb4b5e814e..58d183ed9403060f474fc2e970e1008767cd609b 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (abs_s32_x_tied1, svint32_t,
 
 /*
 ** abs_s32_x_untied:
+**     movprfx z0, z1
 **     abs     z0\.s, p0/m, z1\.s
 **     ret
 */
index c7b60ff484317e274f3b2e8c511b9525f92feb1a..2842048d4eb8548015d25a05499e5079e6e8924c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (abs_s64_x_tied1, svint64_t,
 
 /*
 ** abs_s64_x_untied:
+**     movprfx z0, z1
 **     abs     z0\.d, p0/m, z1\.d
 **     ret
 */
index 0bc64c078a26a68e824d5f8535e6e287274025b2..ec0d89d8ba9c0ba806a6842ca59c632fdd33cdc9 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (abs_s8_x_tied1, svint8_t,
 
 /*
 ** abs_s8_x_untied:
+**     movprfx z0, z1
 **     abs     z0\.b, p0/m, z1\.b
 **     ret
 */
index 7af312397b948ec655bcb231eaec8ec1d7ec39ac..5f82612c97a1c6db14ac69521f80bca6dc53c27d 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cls_s16_z, svuint16_t, svint16_t,
 
 /*
 ** cls_s16_x:
+**     movprfx z0, z4
 **     cls     z0\.h, p0/m, z4\.h
 **     ret
 */
index 813876f6877f44d098b91f38188aa8ea1b4d22ae..0db651f2e290966b057b8b4b48f3b5cd9dd3c0c9 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cls_s32_z, svuint32_t, svint32_t,
 
 /*
 ** cls_s32_x:
+**     movprfx z0, z4
 **     cls     z0\.s, p0/m, z4\.s
 **     ret
 */
index 660a20556c80d5f28b58d99b07c36377efbcd05a..e809e2fb2ab2f0d66f645dda139d1d3db39f9e59 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cls_s64_z, svuint64_t, svint64_t,
 
 /*
 ** cls_s64_x:
+**     movprfx z0, z4
 **     cls     z0\.d, p0/m, z4\.d
 **     ret
 */
index 56f5c26086ff3aa973253db55a3b63ac2295109c..f296c9f932ff320cb8b9a07fbaff9c10e26082db 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cls_s8_z, svuint8_t, svint8_t,
 
 /*
 ** cls_s8_x:
+**     movprfx z0, z4
 **     cls     z0\.b, p0/m, z4\.b
 **     ret
 */
index 58f89005cd5af8ce98b8197d6d9c6dcf8c9eab6b..dc2c4e952f7514dcd0b3fd8fa85ff38d22d9ecbc 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (clz_s16_z, svuint16_t, svint16_t,
 
 /*
 ** clz_s16_x:
+**     movprfx z0, z4
 **     clz     z0\.h, p0/m, z4\.h
 **     ret
 */
index a9198070b580692f9be3defce9c98d8adece68e5..17f54bcd05673610d3f13165540e05771597e58d 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (clz_s32_z, svuint32_t, svint32_t,
 
 /*
 ** clz_s32_x:
+**     movprfx z0, z4
 **     clz     z0\.s, p0/m, z4\.s
 **     ret
 */
index 02c0c993e0b7e3faba2fe6d528dbd71254db176f..a42b730c5641ae1a26b2341e00f97a892c7245c3 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (clz_s64_z, svuint64_t, svint64_t,
 
 /*
 ** clz_s64_x:
+**     movprfx z0, z4
 **     clz     z0\.d, p0/m, z4\.d
 **     ret
 */
index 642d298c8efc940b2b188a8d3cac5b4a0373651e..66c23594f0535a8d365a3f736cee28657679fd2d 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (clz_s8_z, svuint8_t, svint8_t,
 
 /*
 ** clz_s8_x:
+**     movprfx z0, z4
 **     clz     z0\.b, p0/m, z4\.b
 **     ret
 */
index f0872301759bfc7870786818fa42c4aa0a0ef997..ab31f567aee603b94cb38d09d3a4d174f38cd521 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (clz_u16_x_tied1, svuint16_t,
 
 /*
 ** clz_u16_x_untied:
+**     movprfx z0, z1
 **     clz     z0\.h, p0/m, z1\.h
 **     ret
 */
index e0042413162b86bdd05229d63eeffc9af48520c4..2a7440455a82394b638781e0d30d1104ae75a363 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (clz_u32_x_tied1, svuint32_t,
 
 /*
 ** clz_u32_x_untied:
+**     movprfx z0, z1
 **     clz     z0\.s, p0/m, z1\.s
 **     ret
 */
index e879e1b9a6ea4d4bf76e01972b84dc6f39684ee2..8ff73c424098c8f5005ee7697a64e5310c9e7e6e 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (clz_u64_x_tied1, svuint64_t,
 
 /*
 ** clz_u64_x_untied:
+**     movprfx z0, z1
 **     clz     z0\.d, p0/m, z1\.d
 **     ret
 */
index ce6cb8f45172b7a0f1d30ab849d374db5a3e80ec..89d8c54079c9d4bf22df75e4e79187c11c26790b 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (clz_u8_x_tied1, svuint8_t,
 
 /*
 ** clz_u8_x_untied:
+**     movprfx z0, z1
 **     clz     z0\.b, p0/m, z1\.b
 **     ret
 */
index 19d46be68b57cd154b1bd0b50633720646564100..8f047fbbc0a8d75e3db559f8f035f22657415603 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_s16_x_tied1, svint16_t,
 
 /*
 ** cnot_s16_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.h, p0/m, z1\.h
 **     ret
 */
index 041b59a046ccaabf4be8fa88fcaecaa657ea5dde..f5b33959da27c3069099c83b7bcdb4afe1018d35 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_s32_x_tied1, svint32_t,
 
 /*
 ** cnot_s32_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.s, p0/m, z1\.s
 **     ret
 */
index c7135cb9568937a96259474c3bc7577e5b0bbd45..64121e3f0e189b4782048a180f93cb2fe6652ef5 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_s64_x_tied1, svint64_t,
 
 /*
 ** cnot_s64_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.d, p0/m, z1\.d
 **     ret
 */
index 0560f97516b383e6357ebbe9c61b3f80a908e5a1..e5dab42ad5ec2ffa04bf888188423214ea44cd7e 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_s8_x_tied1, svint8_t,
 
 /*
 ** cnot_s8_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.b, p0/m, z1\.b
 **     ret
 */
index 7ea9ff71ded6b856de614772c8a01714f6130723..74c72c9ee0f9da49f94045e433f25d2c7682f7cc 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_u16_x_tied1, svuint16_t,
 
 /*
 ** cnot_u16_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.h, p0/m, z1\.h
 **     ret
 */
index 972c7751eb6bdb925617563430c321aab840ff4c..b0f7531ee08704e3cb6920e299b8151e0fd1b202 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_u32_x_tied1, svuint32_t,
 
 /*
 ** cnot_u32_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.s, p0/m, z1\.s
 **     ret
 */
index f25e001c56934865a1dec04387c31bad0224e3ba..9aa698dfbbf86e80dcbab5be816ff15421a91a52 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_u64_x_tied1, svuint64_t,
 
 /*
 ** cnot_u64_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.d, p0/m, z1\.d
 **     ret
 */
index e135a72956afc336ab762fdb06e43273ca927931..67c46a2dd81d8835fadf19a1e7cb95a85b47cdd4 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnot_u8_x_tied1, svuint8_t,
 
 /*
 ** cnot_u8_x_untied:
+**     movprfx z0, z1
 **     cnot    z0\.b, p0/m, z1\.b
 **     ret
 */
index d92fbc1572d012d129cc05f4ddf748e24fc9fcb1..bebf361283440a3679241cf4d4761fd58a66ef8d 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_bf16_z, svuint16_t, svbfloat16_t,
 
 /*
 ** cnt_bf16_x:
+**     movprfx z0, z4
 **     cnt     z0\.h, p0/m, z4\.h
 **     ret
 */
index b8061bb80dda557a45f2700accc9aad200f462b1..20c95d62121a203a61d73a8e21d613ffd9975382 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_f16_z, svuint16_t, svfloat16_t,
 
 /*
 ** cnt_f16_x:
+**     movprfx z0, z4
 **     cnt     z0\.h, p0/m, z4\.h
 **     ret
 */
index b9292c97709dc1cb92b03ba15ec6abaa52126c2b..8afeb49da4f9bd75a2bb1a1e7053c9a590acb40c 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_f32_z, svuint32_t, svfloat32_t,
 
 /*
 ** cnt_f32_x:
+**     movprfx z0, z4
 **     cnt     z0\.s, p0/m, z4\.s
 **     ret
 */
index 4976ee467a2a38705c98c1ae9397f044102e078a..b7683a97f68e51e1aa60832e453973c700edb4b6 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_f64_z, svuint64_t, svfloat64_t,
 
 /*
 ** cnt_f64_x:
+**     movprfx z0, z4
 **     cnt     z0\.d, p0/m, z4\.d
 **     ret
 */
index a8ff8f3d2cfba2d7527a659a4f8c6f78dca5d885..824c42ad549dacb12be1c594fa16d45dde5b59b7 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_s16_z, svuint16_t, svint16_t,
 
 /*
 ** cnt_s16_x:
+**     movprfx z0, z4
 **     cnt     z0\.h, p0/m, z4\.h
 **     ret
 */
index 3d16041f24e2b43d534e16240b160a13650317fe..d6653d57e00b323044701294b28b80cb717f33ea 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_s32_z, svuint32_t, svint32_t,
 
 /*
 ** cnt_s32_x:
+**     movprfx z0, z4
 **     cnt     z0\.s, p0/m, z4\.s
 **     ret
 */
index 8c8871ba5934a96c1a40045a9b36b6b0614cd9d5..c28db82dc2144f08104690cf6eae0bf7327841d6 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_s64_z, svuint64_t, svint64_t,
 
 /*
 ** cnt_s64_x:
+**     movprfx z0, z4
 **     cnt     z0\.d, p0/m, z4\.d
 **     ret
 */
index 8d85c8e51496fb3ec0e64ee26a74a909cc6b7312..e741b4c93327d54bb76216431212d6a47547b220 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (cnt_s8_z, svuint8_t, svint8_t,
 
 /*
 ** cnt_s8_x:
+**     movprfx z0, z4
 **     cnt     z0\.b, p0/m, z4\.b
 **     ret
 */
index f173d3108f21b971953983595e61f56d4fa2942d..49236cd2cdcd4c1a245a19d46be4a2c0ac954c22 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnt_u16_x_tied1, svuint16_t,
 
 /*
 ** cnt_u16_x_untied:
+**     movprfx z0, z1
 **     cnt     z0\.h, p0/m, z1\.h
 **     ret
 */
index 11969a6b6ed62a5db8d61f79aa4fcd2b798f2eab..d302e323023da39f379cb9290c30733f127f526d 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnt_u32_x_tied1, svuint32_t,
 
 /*
 ** cnt_u32_x_untied:
+**     movprfx z0, z1
 **     cnt     z0\.s, p0/m, z1\.s
 **     ret
 */
index 4eb69ea846e1ef7b1685ab1156c1661b597bb7de..b6e26ba1725cb15a58084f30b19496b6a07f0d1c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnt_u64_x_tied1, svuint64_t,
 
 /*
 ** cnt_u64_x_untied:
+**     movprfx z0, z1
 **     cnt     z0\.d, p0/m, z1\.d
 **     ret
 */
index 30e798302194b5c44f65568dddd9242980306590..464dc4e8c31dc6d15306883122c0ec747406d3a9 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (cnt_u8_x_tied1, svuint8_t,
 
 /*
 ** cnt_u8_x_untied:
+**     movprfx z0, z1
 **     cnt     z0\.b, p0/m, z1\.b
 **     ret
 */
index 52baa1f5881753ef797be2faf7ac71614edff1e0..d4f9150728a8af4ef0c1312274a8abb68ea788c3 100644 (file)
@@ -66,6 +66,7 @@ TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
 
 /*
 ** cvt_bf16_f32_x_untied:
+**     movprfx z0, z4
 **     bfcvt   z0\.h, p0/m, z4\.s
 **     ret
 */
index 5dcd480464bc02875880cf08f53fbab7b53098ab..dbb042d46dfd8e352a8d071288e4fcbe4ad574bd 100644 (file)
@@ -421,6 +421,7 @@ TEST_DUAL_Z_REV (cvt_f16_f32_x_tied1, svfloat16_t, svfloat32_t,
 
 /*
 ** cvt_f16_f32_x_untied:
+**     movprfx z0, z4
 **     fcvt    z0\.h, p0/m, z4\.s
 **     ret
 */
@@ -439,6 +440,7 @@ TEST_DUAL_Z_REV (cvt_f16_f64_x_tied1, svfloat16_t, svfloat64_t,
 
 /*
 ** cvt_f16_f64_x_untied:
+**     movprfx z0, z4
 **     fcvt    z0\.h, p0/m, z4\.d
 **     ret
 */
@@ -457,6 +459,7 @@ TEST_DUAL_Z_REV (cvt_f16_s16_x_tied1, svfloat16_t, svint16_t,
 
 /*
 ** cvt_f16_s16_x_untied:
+**     movprfx z0, z4
 **     scvtf   z0\.h, p0/m, z4\.h
 **     ret
 */
@@ -475,6 +478,7 @@ TEST_DUAL_Z_REV (cvt_f16_s32_x_tied1, svfloat16_t, svint32_t,
 
 /*
 ** cvt_f16_s32_x_untied:
+**     movprfx z0, z4
 **     scvtf   z0\.h, p0/m, z4\.s
 **     ret
 */
@@ -493,6 +497,7 @@ TEST_DUAL_Z_REV (cvt_f16_s64_x_tied1, svfloat16_t, svint64_t,
 
 /*
 ** cvt_f16_s64_x_untied:
+**     movprfx z0, z4
 **     scvtf   z0\.h, p0/m, z4\.d
 **     ret
 */
@@ -511,6 +516,7 @@ TEST_DUAL_Z_REV (cvt_f16_u16_x_tied1, svfloat16_t, svuint16_t,
 
 /*
 ** cvt_f16_u16_x_untied:
+**     movprfx z0, z4
 **     ucvtf   z0\.h, p0/m, z4\.h
 **     ret
 */
@@ -529,6 +535,7 @@ TEST_DUAL_Z_REV (cvt_f16_u32_x_tied1, svfloat16_t, svuint32_t,
 
 /*
 ** cvt_f16_u32_x_untied:
+**     movprfx z0, z4
 **     ucvtf   z0\.h, p0/m, z4\.s
 **     ret
 */
@@ -547,6 +554,7 @@ TEST_DUAL_Z_REV (cvt_f16_u64_x_tied1, svfloat16_t, svuint64_t,
 
 /*
 ** cvt_f16_u64_x_untied:
+**     movprfx z0, z4
 **     ucvtf   z0\.h, p0/m, z4\.d
 **     ret
 */
index c1646993996d0f602252cfffd46794de00c426a1..f7bfe57ada4c5c6a152c497b9e997e73118264f4 100644 (file)
@@ -319,6 +319,7 @@ TEST_DUAL_Z_REV (cvt_f32_f16_x_tied1, svfloat32_t, svfloat16_t,
 
 /*
 ** cvt_f32_f16_x_untied:
+**     movprfx z0, z4
 **     fcvt    z0\.s, p0/m, z4\.h
 **     ret
 */
@@ -337,6 +338,7 @@ TEST_DUAL_Z_REV (cvt_f32_f64_x_tied1, svfloat32_t, svfloat64_t,
 
 /*
 ** cvt_f32_f64_x_untied:
+**     movprfx z0, z4
 **     fcvt    z0\.s, p0/m, z4\.d
 **     ret
 */
@@ -355,6 +357,7 @@ TEST_DUAL_Z_REV (cvt_f32_s32_x_tied1, svfloat32_t, svint32_t,
 
 /*
 ** cvt_f32_s32_x_untied:
+**     movprfx z0, z4
 **     scvtf   z0\.s, p0/m, z4\.s
 **     ret
 */
@@ -373,6 +376,7 @@ TEST_DUAL_Z_REV (cvt_f32_s64_x_tied1, svfloat32_t, svint64_t,
 
 /*
 ** cvt_f32_s64_x_untied:
+**     movprfx z0, z4
 **     scvtf   z0\.s, p0/m, z4\.d
 **     ret
 */
@@ -391,6 +395,7 @@ TEST_DUAL_Z_REV (cvt_f32_u32_x_tied1, svfloat32_t, svuint32_t,
 
 /*
 ** cvt_f32_u32_x_untied:
+**     movprfx z0, z4
 **     ucvtf   z0\.s, p0/m, z4\.s
 **     ret
 */
@@ -409,6 +414,7 @@ TEST_DUAL_Z_REV (cvt_f32_u64_x_tied1, svfloat32_t, svuint64_t,
 
 /*
 ** cvt_f32_u64_x_untied:
+**     movprfx z0, z4
 **     ucvtf   z0\.s, p0/m, z4\.d
 **     ret
 */
index 1d08e6ec503d17b0a84797d005613546c464006d..bfa36baf2807dc8d9b83e3d13afb81376edf85c9 100644 (file)
@@ -319,6 +319,7 @@ TEST_DUAL_Z_REV (cvt_f64_f16_x_tied1, svfloat64_t, svfloat16_t,
 
 /*
 ** cvt_f64_f16_x_untied:
+**     movprfx z0, z4
 **     fcvt    z0\.d, p0/m, z4\.h
 **     ret
 */
@@ -337,6 +338,7 @@ TEST_DUAL_Z_REV (cvt_f64_f32_x_tied1, svfloat64_t, svfloat32_t,
 
 /*
 ** cvt_f64_f32_x_untied:
+**     movprfx z0, z4
 **     fcvt    z0\.d, p0/m, z4\.s
 **     ret
 */
@@ -355,6 +357,7 @@ TEST_DUAL_Z_REV (cvt_f64_s32_x_tied1, svfloat64_t, svint32_t,
 
 /*
 ** cvt_f64_s32_x_untied:
+**     movprfx z0, z4
 **     scvtf   z0\.d, p0/m, z4\.s
 **     ret
 */
@@ -373,6 +376,7 @@ TEST_DUAL_Z_REV (cvt_f64_s64_x_tied1, svfloat64_t, svint64_t,
 
 /*
 ** cvt_f64_s64_x_untied:
+**     movprfx z0, z4
 **     scvtf   z0\.d, p0/m, z4\.d
 **     ret
 */
@@ -391,6 +395,7 @@ TEST_DUAL_Z_REV (cvt_f64_u32_x_tied1, svfloat64_t, svuint32_t,
 
 /*
 ** cvt_f64_u32_x_untied:
+**     movprfx z0, z4
 **     ucvtf   z0\.d, p0/m, z4\.s
 **     ret
 */
@@ -409,6 +414,7 @@ TEST_DUAL_Z_REV (cvt_f64_u64_x_tied1, svfloat64_t, svuint64_t,
 
 /*
 ** cvt_f64_u64_x_untied:
+**     movprfx z0, z4
 **     ucvtf   z0\.d, p0/m, z4\.d
 **     ret
 */
index 81761ab092cb5fdcc1e861fa1538d6fccc2759d5..6b6883be8dbf005de689ebc2a65bdb20a84cc59e 100644 (file)
@@ -64,6 +64,7 @@ TEST_DUAL_Z_REV (cvt_s16_f16_x_tied1, svint16_t, svfloat16_t,
 
 /*
 ** cvt_s16_f16_x_untied:
+**     movprfx z0, z4
 **     fcvtzs  z0\.h, p0/m, z4\.h
 **     ret
 */
index d30da5cc53a184605c13e3be62f38ac008610fc8..bf87356d505c7765a26109e1c6b115a4e8bd95af 100644 (file)
@@ -166,6 +166,7 @@ TEST_DUAL_Z_REV (cvt_s32_f16_x_tied1, svint32_t, svfloat16_t,
 
 /*
 ** cvt_s32_f16_x_untied:
+**     movprfx z0, z4
 **     fcvtzs  z0\.s, p0/m, z4\.h
 **     ret
 */
@@ -184,6 +185,7 @@ TEST_DUAL_Z_REV (cvt_s32_f32_x_tied1, svint32_t, svfloat32_t,
 
 /*
 ** cvt_s32_f32_x_untied:
+**     movprfx z0, z4
 **     fcvtzs  z0\.s, p0/m, z4\.s
 **     ret
 */
@@ -202,6 +204,7 @@ TEST_DUAL_Z_REV (cvt_s32_f64_x_tied1, svint32_t, svfloat64_t,
 
 /*
 ** cvt_s32_f64_x_untied:
+**     movprfx z0, z4
 **     fcvtzs  z0\.s, p0/m, z4\.d
 **     ret
 */
index 68cd80784deba6891bae5e6cedf12b3e530c8b82..9be3e05386fe2c984da95688458ce500c30945b8 100644 (file)
@@ -166,6 +166,7 @@ TEST_DUAL_Z_REV (cvt_s64_f16_x_tied1, svint64_t, svfloat16_t,
 
 /*
 ** cvt_s64_f16_x_untied:
+**     movprfx z0, z4
 **     fcvtzs  z0\.d, p0/m, z4\.h
 **     ret
 */
@@ -184,6 +185,7 @@ TEST_DUAL_Z_REV (cvt_s64_f32_x_tied1, svint64_t, svfloat32_t,
 
 /*
 ** cvt_s64_f32_x_untied:
+**     movprfx z0, z4
 **     fcvtzs  z0\.d, p0/m, z4\.s
 **     ret
 */
@@ -202,6 +204,7 @@ TEST_DUAL_Z_REV (cvt_s64_f64_x_tied1, svint64_t, svfloat64_t,
 
 /*
 ** cvt_s64_f64_x_untied:
+**     movprfx z0, z4
 **     fcvtzs  z0\.d, p0/m, z4\.d
 **     ret
 */
index 4db0dffdd97798d61a75b06af0bbf4bdbece8676..33a608b01fbb1f2a29c649284673f2695f5a6fe6 100644 (file)
@@ -64,6 +64,7 @@ TEST_DUAL_Z_REV (cvt_u16_f16_x_tied1, svuint16_t, svfloat16_t,
 
 /*
 ** cvt_u16_f16_x_untied:
+**     movprfx z0, z4
 **     fcvtzu  z0\.h, p0/m, z4\.h
 **     ret
 */
index 52ef49fcf09264d73bdb187a3e6d62383d99c691..4791d2798fc31a3bc5cb231ca1720eea3cbd9bda 100644 (file)
@@ -166,6 +166,7 @@ TEST_DUAL_Z_REV (cvt_u32_f16_x_tied1, svuint32_t, svfloat16_t,
 
 /*
 ** cvt_u32_f16_x_untied:
+**     movprfx z0, z4
 **     fcvtzu  z0\.s, p0/m, z4\.h
 **     ret
 */
@@ -184,6 +185,7 @@ TEST_DUAL_Z_REV (cvt_u32_f32_x_tied1, svuint32_t, svfloat32_t,
 
 /*
 ** cvt_u32_f32_x_untied:
+**     movprfx z0, z4
 **     fcvtzu  z0\.s, p0/m, z4\.s
 **     ret
 */
@@ -202,6 +204,7 @@ TEST_DUAL_Z_REV (cvt_u32_f64_x_tied1, svuint32_t, svfloat64_t,
 
 /*
 ** cvt_u32_f64_x_untied:
+**     movprfx z0, z4
 **     fcvtzu  z0\.s, p0/m, z4\.d
 **     ret
 */
index 0c43758aeb44a9a1c586ba0a1045a1f0df9a3cc1..e6c10c19b658fc2eb802608f2910b6048242d180 100644 (file)
@@ -166,6 +166,7 @@ TEST_DUAL_Z_REV (cvt_u64_f16_x_tied1, svuint64_t, svfloat16_t,
 
 /*
 ** cvt_u64_f16_x_untied:
+**     movprfx z0, z4
 **     fcvtzu  z0\.d, p0/m, z4\.h
 **     ret
 */
@@ -184,6 +185,7 @@ TEST_DUAL_Z_REV (cvt_u64_f32_x_tied1, svuint64_t, svfloat32_t,
 
 /*
 ** cvt_u64_f32_x_untied:
+**     movprfx z0, z4
 **     fcvtzu  z0\.d, p0/m, z4\.s
 **     ret
 */
@@ -202,6 +204,7 @@ TEST_DUAL_Z_REV (cvt_u64_f64_x_tied1, svuint64_t, svfloat64_t,
 
 /*
 ** cvt_u64_f64_x_untied:
+**     movprfx z0, z4
 **     fcvtzu  z0\.d, p0/m, z4\.d
 **     ret
 */
index 32e836f013bca5c54aea54f75f0ae8cae736c58a..76c71437dc1b4409fbae4bed204f1e4e26a69a31 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (extb_s16_x_tied1, svint16_t,
 
 /*
 ** extb_s16_x_untied:
+**     movprfx z0, z1
 **     sxtb    z0\.h, p0/m, z1\.h
 **     ret
 */
index e2f13f41cf5708a388971c98ca1359183aecbfd9..084c1c19b32ebf5365c9f249daf1452434fc416c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (extb_s32_x_tied1, svint32_t,
 
 /*
 ** extb_s32_x_untied:
+**     movprfx z0, z1
 **     sxtb    z0\.s, p0/m, z1\.s
 **     ret
 */
index 83363efdb7f7e1834cc7c56db6f117cfa14e251b..8f3ee8d053bb31457af8e633d064fc0e1524517c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (extb_s64_x_tied1, svint64_t,
 
 /*
 ** extb_s64_x_untied:
+**     movprfx z0, z1
 **     sxtb    z0\.d, p0/m, z1\.d
 **     ret
 */
index 3bb0bf31f20680c14ae0489a8449998ffdf09b56..d15cf7a62cd24d1bf1f2857b800e63253d11538e 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (exth_s32_x_tied1, svint32_t,
 
 /*
 ** exth_s32_x_untied:
+**     movprfx z0, z1
 **     sxth    z0\.s, p0/m, z1\.s
 **     ret
 */
index 0718b67ad149c24b63624e3cc077b7f9e6d93ad0..d8adf52efa20f5814845f073fda19316f92eea97 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (exth_s64_x_tied1, svint64_t,
 
 /*
 ** exth_s64_x_untied:
+**     movprfx z0, z1
 **     sxth    z0\.d, p0/m, z1\.d
 **     ret
 */
index a6edadfa75caa01da20386510d263cf82accd9c1..978a622e09f80afd3f22045533a0aa5b41246e3a 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (extw_s64_x_tied1, svint64_t,
 
 /*
 ** extw_s64_x_untied:
+**     movprfx z0, z1
 **     sxtw    z0\.d, p0/m, z1\.d
 **     ret
 */
index c31eba922189bf0573516020c8ffb484181e6c47..c43c6eb7a196e9baf69a52a7080262d5d0545618 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (neg_f16_x_tied1, svfloat16_t,
 
 /*
 ** neg_f16_x_untied:
+**     movprfx z0, z1
 **     fneg    z0\.h, p0/m, z1\.h
 **     ret
 */
index a57d264ad559a566344f84cedae99dc31bf406ad..3e9fd5b46f57bb4fa0e945019a09f4fe985dc796 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (neg_f32_x_tied1, svfloat32_t,
 
 /*
 ** neg_f32_x_untied:
+**     movprfx z0, z1
 **     fneg    z0\.s, p0/m, z1\.s
 **     ret
 */
index 90cadd4f9694e896a1977b5586b7f5a5a9e36673..880f5e8867f61868936c6842525809c5146ba30a 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (neg_f64_x_tied1, svfloat64_t,
 
 /*
 ** neg_f64_x_untied:
+**     movprfx z0, z1
 **     fneg    z0\.d, p0/m, z1\.d
 **     ret
 */
index 80b2ee0f7ac173adbf7d5a2e592b6c4ea9e49501..6a43bb20c37fa6dcce0ed06cbe2b9a3d11836733 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (neg_s16_x_tied1, svint16_t,
 
 /*
 ** neg_s16_x_untied:
+**     movprfx z0, z1
 **     neg     z0\.h, p0/m, z1\.h
 **     ret
 */
index b8805034eb90582cbd4809f469aedd481e6ee1f7..ea92412b5f8fe5f9b27958056ad39ad80aa7d445 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (neg_s32_x_tied1, svint32_t,
 
 /*
 ** neg_s32_x_untied:
+**     movprfx z0, z1
 **     neg     z0\.s, p0/m, z1\.s
 **     ret
 */
index 82abe672350be6974cf660c2a83ac4f51d8d6f47..911d1f3db16b5f18dac8839f8e8017a38dee9ca8 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (neg_s64_x_tied1, svint64_t,
 
 /*
 ** neg_s64_x_untied:
+**     movprfx z0, z1
 **     neg     z0\.d, p0/m, z1\.d
 **     ret
 */
index b7c9949ad1eadb7b615e983b2db7433fbb2f2e9a..ace74b747b2dc7b3df85b626fd68d41e63fcc809 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (neg_s8_x_tied1, svint8_t,
 
 /*
 ** neg_s8_x_untied:
+**     movprfx z0, z1
 **     neg     z0\.b, p0/m, z1\.b
 **     ret
 */
index bacd6b12cc21591cf40fe1787e0ac8bd8ef449be..9cafba96ea5a41bfc65f4077473f24af67093760 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_s16_x_tied1, svint16_t,
 
 /*
 ** not_s16_x_untied:
+**     movprfx z0, z1
 **     not     z0\.h, p0/m, z1\.h
 **     ret
 */
index 8b15d6e91c85527c748ef37922c939d50143442d..2185b783125181f41a6a667a0b46fdb5cdb5af5b 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_s32_x_tied1, svint32_t,
 
 /*
 ** not_s32_x_untied:
+**     movprfx z0, z1
 **     not     z0\.s, p0/m, z1\.s
 **     ret
 */
index 8e7f7b9e876db46a82a8c70ca1f7b4e63c86af06..09b3c2558527014f1ea70abae800846827b960e2 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_s64_x_tied1, svint64_t,
 
 /*
 ** not_s64_x_untied:
+**     movprfx z0, z1
 **     not     z0\.d, p0/m, z1\.d
 **     ret
 */
index e807f08f81097b0b70398b003da520b37cc6d72a..029909e5cfd8f81c453d103f00efa950da056233 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_s8_x_tied1, svint8_t,
 
 /*
 ** not_s8_x_untied:
+**     movprfx z0, z1
 **     not     z0\.b, p0/m, z1\.b
 **     ret
 */
index c812005f11819860afb73edcc35c2f71e1c6a555..fc33c99fffb3e214147164c883508f9c4d0244c6 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_u16_x_tied1, svuint16_t,
 
 /*
 ** not_u16_x_untied:
+**     movprfx z0, z1
 **     not     z0\.h, p0/m, z1\.h
 **     ret
 */
index 7b7e9ca2189d45e2f66ab1e28525c472945b2ff2..3f5e822ac94fe8b7402a4e19be10f2fe2c330110 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_u32_x_tied1, svuint32_t,
 
 /*
 ** not_u32_x_untied:
+**     movprfx z0, z1
 **     not     z0\.s, p0/m, z1\.s
 **     ret
 */
index 27b92ad84d46afcd08ec7d5412583047a430f5d9..01dde36ec43ba2db23c7b5341e20aba7c63b0919 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_u64_x_tied1, svuint64_t,
 
 /*
 ** not_u64_x_untied:
+**     movprfx z0, z1
 **     not     z0\.d, p0/m, z1\.d
 **     ret
 */
index bd2f36cade8043a17c68a7118b5bd64c8263bdae..e8553e3935ca99cd3ce29a69d7769393f701a970 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (not_u8_x_tied1, svuint8_t,
 
 /*
 ** not_u8_x_untied:
+**     movprfx z0, z1
 **     not     z0\.b, p0/m, z1\.b
 **     ret
 */
index 4f794f60074ca33ed7a6e68e8bca3efe5fa64f2d..5889c92ff6d10f198fb093bf0abeb4136e0d39d2 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_s16_x_tied1, svint16_t,
 
 /*
 ** rbit_s16_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.h, p0/m, z1\.h
 **     ret
 */
index 8b5e1a463a89925b59773ca2784f665627faff63..1414e3e35ecdff3fddaf7b3770347126c42eab1d 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_s32_x_tied1, svint32_t,
 
 /*
 ** rbit_s32_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.s, p0/m, z1\.s
 **     ret
 */
index cec27a42182aec658234d203869ea29e712c2ae6..3b76f5483a02c19d0d92aab5f40e5a8b3c0fd111 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_s64_x_tied1, svint64_t,
 
 /*
 ** rbit_s64_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.d, p0/m, z1\.d
 **     ret
 */
index 9c152116acfcd6a213ab06ef118ad0de429e3faa..1fc80e34ce74159a891652933ef6ef5eb4954c66 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_s8_x_tied1, svint8_t,
 
 /*
 ** rbit_s8_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.b, p0/m, z1\.b
 **     ret
 */
index 001ef2bf0756dfabc04136ae184a882edecf4795..647933723472bc4c3397db8d4f05bf5555246f92 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_u16_x_tied1, svuint16_t,
 
 /*
 ** rbit_u16_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.h, p0/m, z1\.h
 **     ret
 */
index 4d91e954d7d6d0e3300205fb6546d7e91a1f8b65..3e959642a33420cd158dbcb96ea63e59612c0806 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_u32_x_tied1, svuint32_t,
 
 /*
 ** rbit_u32_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.s, p0/m, z1\.s
 **     ret
 */
index 77f88d116a1653c3ac7b669bd2c833fa61ba0f7c..5163b82b35c7ab354d915569d28ea4d0f9a1be1c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_u64_x_tied1, svuint64_t,
 
 /*
 ** rbit_u64_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.d, p0/m, z1\.d
 **     ret
 */
index fa347e4c7e3206fa032f6ffbf7e51a75ff95d8c9..2372398c7e14751424a717feee47d5e19539d3c0 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rbit_u8_x_tied1, svuint8_t,
 
 /*
 ** rbit_u8_x_untied:
+**     movprfx z0, z1
 **     rbit    z0\.b, p0/m, z1\.b
 **     ret
 */
index 2dd7ada2c2111a74428df28182a6108147d8259e..da63f267dd3238dde08db95cc6972dcba4485a31 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (recpx_f16_x_tied1, svfloat16_t,
 
 /*
 ** recpx_f16_x_untied:
+**     movprfx z0, z1
 **     frecpx  z0\.h, p0/m, z1\.h
 **     ret
 */
index 6364fb83ba3231c7b4dbedfb83961a6560e3a827..ea8cb785367b2474c4626aeab728892e337b6ea4 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (recpx_f32_x_tied1, svfloat32_t,
 
 /*
 ** recpx_f32_x_untied:
+**     movprfx z0, z1
 **     frecpx  z0\.s, p0/m, z1\.s
 **     ret
 */
index ca5232331db7abeb61f69584219b19e74f941e7d..1eaca67a2d23d59578711f35019488b711fd171f 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (recpx_f64_x_tied1, svfloat64_t,
 
 /*
 ** recpx_f64_x_untied:
+**     movprfx z0, z1
 **     frecpx  z0\.d, p0/m, z1\.d
 **     ret
 */
index ecfabe668eefbdf27c0fe58a334744c970f2ca19..a99260f0f30e12e251ef174b89684dcbe9eaf6da 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revb_s16_x_tied1, svint16_t,
 
 /*
 ** revb_s16_x_untied:
+**     movprfx z0, z1
 **     revb    z0\.h, p0/m, z1\.h
 **     ret
 */
index a46a819737a803ed1c308f603291998ab5315c8d..adbf1286129ac602a7602a8f1e941fc82d9d7bc1 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revb_s32_x_tied1, svint32_t,
 
 /*
 ** revb_s32_x_untied:
+**     movprfx z0, z1
 **     revb    z0\.s, p0/m, z1\.s
 **     ret
 */
index 21547238c756cf7cc142cd53bcaca11455918a52..d21db75bf20cbb00496630d34c21460f35b8621c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revb_s64_x_tied1, svint64_t,
 
 /*
 ** revb_s64_x_untied:
+**     movprfx z0, z1
 **     revb    z0\.d, p0/m, z1\.d
 **     ret
 */
index d58bd3d740981417f8eed41f61b5ec3c005d0256..d48704f819ddbc9b5f80fc42befe9f46fd8c2d3e 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revb_u16_x_tied1, svuint16_t,
 
 /*
 ** revb_u16_x_untied:
+**     movprfx z0, z1
 **     revb    z0\.h, p0/m, z1\.h
 **     ret
 */
index 33df990d55f045c6d7419b6465e3ac77c3dc2dc4..cf9293bfb33284a8408be027a3d1eda50feab658 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revb_u32_x_tied1, svuint32_t,
 
 /*
 ** revb_u32_x_untied:
+**     movprfx z0, z1
 **     revb    z0\.s, p0/m, z1\.s
 **     ret
 */
index 50ad618cc1a5b2e827cff878c209894b9f4b5f69..54db72dab2f4fffdfb582b30e4f282b44c2af6f7 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revb_u64_x_tied1, svuint64_t,
 
 /*
 ** revb_u64_x_untied:
+**     movprfx z0, z1
 **     revb    z0\.d, p0/m, z1\.d
 **     ret
 */
index 07d512ddb757827c7ac25c3cc62a45e0d50f4c66..fb63c17d70235482c80cf78131f2e5ce015cfeba 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revh_s32_x_tied1, svint32_t,
 
 /*
 ** revh_s32_x_untied:
+**     movprfx z0, z1
 **     revh    z0\.s, p0/m, z1\.s
 **     ret
 */
index b1446347c0f71f00550ee2216cf2ec3d60741fe5..967600ad6236597668f899e3ce7b34fdfea0f007 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revh_s64_x_tied1, svint64_t,
 
 /*
 ** revh_s64_x_untied:
+**     movprfx z0, z1
 **     revh    z0\.d, p0/m, z1\.d
 **     ret
 */
index 9ea51884d1aea2f3f4f2a1b4ec675bcb7c05707c..265f865b57f492ba11d198c2f693d05e70ae27d0 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revh_u32_x_tied1, svuint32_t,
 
 /*
 ** revh_u32_x_untied:
+**     movprfx z0, z1
 **     revh    z0\.s, p0/m, z1\.s
 **     ret
 */
index 7b2da2701c0fc53117a28a85bcc3d34f71bfd60a..733b229b9ecfecc2b7c2b9be41ae0f0dad650393 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revh_u64_x_tied1, svuint64_t,
 
 /*
 ** revh_u64_x_untied:
+**     movprfx z0, z1
 **     revh    z0\.d, p0/m, z1\.d
 **     ret
 */
index 26ca0f0bd521aa9e9080ff419d6877a595b059c7..08941314c5a388ceebb6ce703bfbb7acf70221f7 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revw_s64_x_tied1, svint64_t,
 
 /*
 ** revw_s64_x_untied:
+**     movprfx z0, z1
 **     revw    z0\.d, p0/m, z1\.d
 **     ret
 */
index c70cdb428bada40f63273674bd94cfc9b022da37..ebde929b2c61ab438f3b08c58e7d18c145a93526 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (revw_u64_x_tied1, svuint64_t,
 
 /*
 ** revw_u64_x_untied:
+**     movprfx z0, z1
 **     revw    z0\.d, p0/m, z1\.d
 **     ret
 */
index 99a604209425c14f094a9064d3784afd4976e23a..3e1a788045c3f4b13d0a584773c1647c6f120152 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rinta_f16_x_tied1, svfloat16_t,
 
 /*
 ** rinta_f16_x_untied:
+**     movprfx z0, z1
 **     frinta  z0\.h, p0/m, z1\.h
 **     ret
 */
index b4e3714bc4e30b949c81434fcd235c17b5c6971c..ae6fe659cbebc4d13febbf851a2920a4363ebfed 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rinta_f32_x_tied1, svfloat32_t,
 
 /*
 ** rinta_f32_x_untied:
+**     movprfx z0, z1
 **     frinta  z0\.s, p0/m, z1\.s
 **     ret
 */
index 24d6b7dc8b258df72062510353870a21ffce2e6f..2f7be6c46de3bc43101ba41de2c378f45e76cda5 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rinta_f64_x_tied1, svfloat64_t,
 
 /*
 ** rinta_f64_x_untied:
+**     movprfx z0, z1
 **     frinta  z0\.d, p0/m, z1\.d
 **     ret
 */
index 1f0ac85e33a4e7a4a4f9a1072c8eddf9c4c0d298..ec3b908f9fc11e992a9ba0d1e0bc070938b6fcd5 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rinti_f16_x_tied1, svfloat16_t,
 
 /*
 ** rinti_f16_x_untied:
+**     movprfx z0, z1
 **     frinti  z0\.h, p0/m, z1\.h
 **     ret
 */
index cf54fde5c36a26c87fe15d466ed8cda66b3adb4f..061f5c8253db4fc13c1217c397cdfb99dbbdb299 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rinti_f32_x_tied1, svfloat32_t,
 
 /*
 ** rinti_f32_x_untied:
+**     movprfx z0, z1
 **     frinti  z0\.s, p0/m, z1\.s
 **     ret
 */
index 08b861caa1eb7bd22cd28ba68ab8591cac31d616..eca3be0816ef417fedc177de7f9afa0d730c8e22 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rinti_f64_x_tied1, svfloat64_t,
 
 /*
 ** rinti_f64_x_untied:
+**     movprfx z0, z1
 **     frinti  z0\.d, p0/m, z1\.d
 **     ret
 */
index 194d01cbd0bcd0b0b59d8f2f6f124f9fa1614d3c..35cb97610d09f6171f265d89ad848f3617628e28 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintm_f16_x_tied1, svfloat16_t,
 
 /*
 ** rintm_f16_x_untied:
+**     movprfx z0, z1
 **     frintm  z0\.h, p0/m, z1\.h
 **     ret
 */
index 6c3297aa1a3e101384fbc454227468f801325ea8..d65baf562c5384d9c1de1c713c882b816e02ffaa 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintm_f32_x_tied1, svfloat32_t,
 
 /*
 ** rintm_f32_x_untied:
+**     movprfx z0, z1
 **     frintm  z0\.s, p0/m, z1\.s
 **     ret
 */
index ecbb2444766ab46e52ac0cc30b16cfdf12944e61..d3824ecd3bd52023600b2a9bba5321a987600dd4 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintm_f64_x_tied1, svfloat64_t,
 
 /*
 ** rintm_f64_x_untied:
+**     movprfx z0, z1
 **     frintm  z0\.d, p0/m, z1\.d
 **     ret
 */
index 273307ef1347bcbf40a2114d788ef61fd696b0bb..cc2bf0ee281db84d918b93349a70f85dec5c83e8 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintn_f16_x_tied1, svfloat16_t,
 
 /*
 ** rintn_f16_x_untied:
+**     movprfx z0, z1
 **     frintn  z0\.h, p0/m, z1\.h
 **     ret
 */
index bafd43106d1e36f94f51881b4043ed66c1a72b6a..aa0c65acdaa6e731a7a7dcd8fa4a43395e8ebf62 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintn_f32_x_tied1, svfloat32_t,
 
 /*
 ** rintn_f32_x_untied:
+**     movprfx z0, z1
 **     frintn  z0\.s, p0/m, z1\.s
 **     ret
 */
index 0142315e6957e0dd33143a7796385b3adcacab4c..a9317adec15b6b535aafb7a0d0d030343c29cf67 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintn_f64_x_tied1, svfloat64_t,
 
 /*
 ** rintn_f64_x_untied:
+**     movprfx z0, z1
 **     frintn  z0\.d, p0/m, z1\.d
 **     ret
 */
index 0e85c34481ac0cf085ed58aeae8d30e3ec874b32..f511452e9a72783634b5b355122dc2ce3ed497fe 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintp_f16_x_tied1, svfloat16_t,
 
 /*
 ** rintp_f16_x_untied:
+**     movprfx z0, z1
 **     frintp  z0\.h, p0/m, z1\.h
 **     ret
 */
index cec360d7cce003d7010d7bedac69dfb97c9afd4a..34596c4b07f9f1bb4786c239ec32f6ee976f5e81 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintp_f32_x_tied1, svfloat32_t,
 
 /*
 ** rintp_f32_x_untied:
+**     movprfx z0, z1
 **     frintp  z0\.s, p0/m, z1\.s
 **     ret
 */
index 1305fb6823f4fc9d6f177f536105254ffae42279..a68a5791bbc9d6543ef9ec652cea28282dfaac35 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintp_f64_x_tied1, svfloat64_t,
 
 /*
 ** rintp_f64_x_untied:
+**     movprfx z0, z1
 **     frintp  z0\.d, p0/m, z1\.d
 **     ret
 */
index 96f7f2c720656c55301f3575fda6964fc118471d..a86e0630d3a24ce6256df439b6145deaedb3cb24 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintx_f16_x_tied1, svfloat16_t,
 
 /*
 ** rintx_f16_x_untied:
+**     movprfx z0, z1
 **     frintx  z0\.h, p0/m, z1\.h
 **     ret
 */
index 1c42d2a9480ee6500c4203a5be4d29ff806bdcfd..956515025c3f6ba7d1eeab83fbe7206bbdb25ec6 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintx_f32_x_tied1, svfloat32_t,
 
 /*
 ** rintx_f32_x_untied:
+**     movprfx z0, z1
 **     frintx  z0\.s, p0/m, z1\.s
 **     ret
 */
index bee806b3bee4c61ea63b94d1cf044df4f4f1bb86..a5c7a01ac773087533b0bb5de4422b0a93af35fc 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintx_f64_x_tied1, svfloat64_t,
 
 /*
 ** rintx_f64_x_untied:
+**     movprfx z0, z1
 **     frintx  z0\.d, p0/m, z1\.d
 **     ret
 */
index be13d82b4a3e7f9fe32f25d6a68e260eeb760f39..cb61080db286c7f127bb441405d6790e010db5af 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintz_f16_x_tied1, svfloat16_t,
 
 /*
 ** rintz_f16_x_untied:
+**     movprfx z0, z1
 **     frintz  z0\.h, p0/m, z1\.h
 **     ret
 */
index 873c0d468aec2ef6451fd754432d33f939393da5..a479909b96e2427fe9dc68185da1f0f97decd3fe 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintz_f32_x_tied1, svfloat32_t,
 
 /*
 ** rintz_f32_x_untied:
+**     movprfx z0, z1
 **     frintz  z0\.s, p0/m, z1\.s
 **     ret
 */
index e6c9d1fc86f68a5c0ce8f04b027ef9a4d7dc4a34..f80f9078263af75b1f612d22b611e5ea3b677661 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rintz_f64_x_tied1, svfloat64_t,
 
 /*
 ** rintz_f64_x_untied:
+**     movprfx z0, z1
 **     frintz  z0\.d, p0/m, z1\.d
 **     ret
 */
index 6dc5940fb9b1cf6029b97dcd50b596a3ffa1eef0..335fb86bc9d33143dfc150f03948ccd1773d6e11 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (sqrt_f16_x_tied1, svfloat16_t,
 
 /*
 ** sqrt_f16_x_untied:
+**     movprfx z0, z1
 **     fsqrt   z0\.h, p0/m, z1\.h
 **     ret
 */
index 71d1f8f74e4a07512e7b945abd02489314be4d2b..0887996799db0c86951393a89b4fd2c56883471c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (sqrt_f32_x_tied1, svfloat32_t,
 
 /*
 ** sqrt_f32_x_untied:
+**     movprfx z0, z1
 **     fsqrt   z0\.s, p0/m, z1\.s
 **     ret
 */
index 7771df545db6a0ca6f2cd96a934fee841b429dd6..7dbab87919dc92e740cfad203334d544b7d08583 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (sqrt_f64_x_tied1, svfloat64_t,
 
 /*
 ** sqrt_f64_x_untied:
+**     movprfx z0, z1
 **     fsqrt   z0\.d, p0/m, z1\.d
 **     ret
 */
index 911defafd7a03cc3fbbee7bd8951fa02d8acd8b0..f66fa901340cdb8e6d27ef4772d402950a44e2c1 100644 (file)
@@ -42,7 +42,13 @@ TEST_DUAL_Z_REV (cvtlt_f32_f16_x_tied1, svfloat32_t, svfloat16_t,
 
 /*
 ** cvtlt_f32_f16_x_untied:
-**     fcvtlt  z0\.s, p0/m, z4\.h
+** (
+**     mov     z0\.d, z4\.d
+**     fcvtlt  z0\.s, p0/m, z0\.h
+** |
+**     fcvtlt  z4\.s, p0/m, z4\.h
+**     mov     z0\.d, z4\.d
+** )
 **     ret
 */
 TEST_DUAL_Z (cvtlt_f32_f16_x_untied, svfloat32_t, svfloat16_t,
index c34947be2b4b6bf9c1cbe202181c2480889bc0d1..b262e2533cf7d7156b5e08d667e518fc4c770225 100644 (file)
@@ -42,7 +42,13 @@ TEST_DUAL_Z_REV (cvtlt_f64_f32_x_tied1, svfloat64_t, svfloat32_t,
 
 /*
 ** cvtlt_f64_f32_x_untied:
-**     fcvtlt  z0\.d, p0/m, z4\.s
+** (
+**     mov     z0\.d, z4\.d
+**     fcvtlt  z0\.d, p0/m, z0\.s
+** |
+**     fcvtlt  z4\.d, p0/m, z4\.s
+**     mov     z0\.d, z4\.d
+** )
 **     ret
 */
 TEST_DUAL_Z (cvtlt_f64_f32_x_untied, svfloat64_t, svfloat32_t,
index 21724c833dca0d74f3aedf148cbc849006b4f708..85fbc79387582f6a5938e106b71e8b65764a8e0c 100644 (file)
@@ -64,6 +64,7 @@ TEST_DUAL_Z_REV (cvtx_f32_f64_x_tied1, svfloat32_t, svfloat64_t,
 
 /*
 ** cvtx_f32_f64_x_untied:
+**     movprfx z0, z4
 **     fcvtx   z0\.s, p0/m, z4\.d
 **     ret
 */
index bc6815690e8763757c7ad39bc3499dd0cabf7805..fe65e640facb1b5e7d7ef33341d4440bba012c72 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (logb_f16_z, svint16_t, svfloat16_t,
 
 /*
 ** logb_f16_x:
+**     movprfx z0, z4
 **     flogb   z0\.h, p0/m, z4\.h
 **     ret
 */
index 35bdcd17b346b3244247699ba42cbd74a61a9ec5..847e1b13507da3064ca0a4f2bb5484da9ad6b0aa 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (logb_f32_z, svint32_t, svfloat32_t,
 
 /*
 ** logb_f32_x:
+**     movprfx z0, z4
 **     flogb   z0\.s, p0/m, z4\.s
 **     ret
 */
index c7c2cb236ea09192df68fe3c17c64257a9d8b6dc..4113a37a6120e4ac7178b72c3fd7d8f2a72ee0d5 100644 (file)
@@ -33,6 +33,7 @@ TEST_DUAL_Z (logb_f64_z, svint64_t, svfloat64_t,
 
 /*
 ** logb_f64_x:
+**     movprfx z0, z4
 **     flogb   z0\.d, p0/m, z4\.d
 **     ret
 */
index 07564882a53e5e54623cb054b789a772289f8254..d7acf47c48fa383ecded38054b1cf539419408ea 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qabs_s16_x_tied1, svint16_t,
 
 /*
 ** qabs_s16_x_untied:
+**     movprfx z0, z1
 **     sqabs   z0\.h, p0/m, z1\.h
 **     ret
 */
index 5341f78f658857408e04c33d891fd16729fcd31f..fc35d1043a2a46e97919eb5eb24ac31ef4f91d4e 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qabs_s32_x_tied1, svint32_t,
 
 /*
 ** qabs_s32_x_untied:
+**     movprfx z0, z1
 **     sqabs   z0\.s, p0/m, z1\.s
 **     ret
 */
index 3679e659e89e73dd2f090d34dcbaf3d5583df875..b572785c965b351ae150723261d9c1fcb51818af 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qabs_s64_x_tied1, svint64_t,
 
 /*
 ** qabs_s64_x_untied:
+**     movprfx z0, z1
 **     sqabs   z0\.d, p0/m, z1\.d
 **     ret
 */
index dca25f9f1dbe3e6338c57001b03c47374b43ab25..48b85605e15af75cb2e541d02af2582c3d512b0e 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qabs_s8_x_tied1, svint8_t,
 
 /*
 ** qabs_s8_x_untied:
+**     movprfx z0, z1
 **     sqabs   z0\.b, p0/m, z1\.b
 **     ret
 */
index ca78f9df0428f40eeb16173ade004a15a2ef3971..d8b6c87ed7c2e9088af12e4a42a0d8926d1255a7 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qneg_s16_x_tied1, svint16_t,
 
 /*
 ** qneg_s16_x_untied:
+**     movprfx z0, z1
 **     sqneg   z0\.h, p0/m, z1\.h
 **     ret
 */
index 3d2ed877740330fec5c9587103c7fcd899a0a68d..2342504f4d928c744b7e288427d801a666c6baca 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qneg_s32_x_tied1, svint32_t,
 
 /*
 ** qneg_s32_x_untied:
+**     movprfx z0, z1
 **     sqneg   z0\.s, p0/m, z1\.s
 **     ret
 */
index e1379863d1f9d167efffdb74431e5dd85c3ade0b..61ccb981fce3c26b2554989821357a9c36655650 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qneg_s64_x_tied1, svint64_t,
 
 /*
 ** qneg_s64_x_untied:
+**     movprfx z0, z1
 **     sqneg   z0\.d, p0/m, z1\.d
 **     ret
 */
index 13c60efffa90aafecab049683ffa57913ca96f50..c7ec6116bb6482949d9befb67e1f4cdf1b2eebae 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (qneg_s8_x_tied1, svint8_t,
 
 /*
 ** qneg_s8_x_untied:
+**     movprfx z0, z1
 **     sqneg   z0\.b, p0/m, z1\.b
 **     ret
 */
index 17c6a72c37f12e996682b9f06dc0c426d34ff839..c484cec63daf7d4d64cf1a1b907aa71c809d0a67 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (recpe_u32_x_tied1, svuint32_t,
 
 /*
 ** recpe_u32_x_untied:
+**     movprfx z0, z1
 **     urecpe  z0\.s, p0/m, z1\.s
 **     ret
 */
index e9e4fb7dcaded63f76159da8b20c4cf45eb312c9..082a810722ec2fe903912486e21970b9bd66322c 100644 (file)
@@ -73,6 +73,7 @@ TEST_UNIFORM_Z (rsqrte_u32_x_tied1, svuint32_t,
 
 /*
 ** rsqrte_u32_x_untied:
+**     movprfx z0, z1
 **     ursqrte z0\.s, p0/m, z1\.s
 **     ret
 */