else if (sn > hard_regno_nregs (sr, vd->e[sr].mode))
return;
+ /* It is not safe to link DEST into the chain if SRC was defined in some
+ narrower mode M and if M is also narrower than the mode of the first
+ register in the chain. For example:
+ (set (reg:DI r1) (reg:DI r0))
+ (set (reg:HI r2) (reg:HI r1))
+ (set (reg:SI r3) (reg:SI r2)) //Should be a new chain start at r3
+ (set (reg:SI r4) (reg:SI r1))
+ (set (reg:SI r5) (reg:SI r4))
+
+ the upper part of r3 is undefined. If we added it to the chain,
+ it may be used to replace r5, which has defined upper bits.
+ See PR98694 for details.
+
+ [A] partial_subreg_p (vd->e[sr].mode, GET_MODE (src))
+ [B] partial_subreg_p (vd->e[sr].mode, vd->e[vd->e[sr].oldest_regno].mode)
+ Condition B is added to to catch optimization opportunities of
+
+ (set (reg:HI R1) (reg:HI R0))
+ (set (reg:SI R2) (reg:SI R1)) // [A]
+ (set (reg:DI R3) (reg:DI R2)) // [A]
+ (set (reg:SI R4) (reg:SI R[0-3]))
+ (set (reg:HI R5) (reg:HI R[0-4]))
+
+ in which all registers have only 16 defined bits. */
+ else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src))
+ && partial_subreg_p (vd->e[sr].mode,
+ vd->e[vd->e[sr].oldest_regno].mode))
+ return;
+
/* Link DR at the end of the value chain used by SR. */
vd->e[dr].oldest_regno = vd->e[sr].oldest_regno;
--- /dev/null
+/* PR rtl-optimization/98694 */
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include<immintrin.h>
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+v4hi b;
+
+__attribute__ ((noipa))
+v2si
+foo (__m512i src1, __m512i src2)
+{
+ __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
+ short s = (short) m;
+ int i = (int)m;
+ b = __extension__ (v4hi) {s, s, s, s};
+ return __extension__ (v2si) {i, i};
+}
+
+int main ()
+{
+ if (!__builtin_cpu_supports ("avx512bw"))
+ return 0;
+
+ __m512i src1 = _mm512_setzero_si512 ();
+ __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 1, 0, 1);
+ __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2);
+ v2si a = foo (src1, src2);
+ if (a[0] != (int)m)
+ __builtin_abort ();
+ return 0;
+}