__builtin_neon_vst1_lanev8bf (__a, __b, __c);
}
+__extension__ extern __inline bfloat16x4x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2_lane_bf16 (const bfloat16_t * __a, bfloat16x4x2_t __b, const int __c)
+{
+ union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+ union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev4bf ( __a, __bu.__o, __c);
+ return __rv.__i;
+}
+
+__extension__ extern __inline bfloat16x8x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld2q_lane_bf16 (const bfloat16_t * __a, bfloat16x8x2_t __b, const int __c)
+{
+ union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld2_lanev8bf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+
+__extension__ extern __inline bfloat16x4x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3_lane_bf16 (const bfloat16_t * __a, bfloat16x4x3_t __b, const int __c)
+{
+ union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+ union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev4bf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+
+__extension__ extern __inline bfloat16x8x3_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld3q_lane_bf16 (const bfloat16_t * __a, bfloat16x8x3_t __b, const int __c)
+{
+ union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+ union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+ __rv.__o = __builtin_neon_vld3_lanev8bf (__a, __bu.__o, __c);
+ return __rv.__i;
+}
+
+__extension__ extern __inline bfloat16x4x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4_lane_bf16 (const bfloat16_t * __a, bfloat16x4x4_t __b, const int __c)
+{
+ union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+ union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev4bf (__a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
+__extension__ extern __inline bfloat16x8x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld4q_lane_bf16 (const bfloat16_t * __a, bfloat16x8x4_t __b, const int __c)
+{
+ union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+ union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+ __rv.__o = __builtin_neon_vld4_lanev8bf (__a,
+ __bu.__o, __c);
+ return __rv.__i;
+}
+
#pragma GCC pop_options
#ifdef __cplusplus
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf)
VAR13 (LOAD1, vld2,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
-VAR9 (LOAD1LANE, vld2_lane,
- v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1LANE, vld2_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR8 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst2,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR13 (LOAD1, vld3,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
-VAR9 (LOAD1LANE, vld3_lane,
- v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1LANE, vld3_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR8 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst3,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR13 (LOAD1, vld4,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
-VAR9 (LOAD1LANE, vld4_lane,
- v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1LANE, vld4_lane,
+ v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf, v4bf, v8bf)
VAR8 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di, v4bf, v8bf)
VAR13 (STORE1, vst4,
v8qi, v4hi, v4hf, v4bf, v2si, v2sf, di, v16qi, v8hi, v8hf, v8bf, v4si, v4sf)
--- /dev/null
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+**test_vld2_lane_bf16:
+** vld2.16 {d0\[2\], d1\[2\]}, \[r0\]
+** bx lr
+*/
+bfloat16x4x2_t
+test_vld2_lane_bf16 (const bfloat16_t *a, bfloat16x4x2_t b)
+{
+ return vld2_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vld2q_lane_bf16:
+** vld2.16 {d0\[2\], d2\[2\]}, \[r0\]
+** bx lr
+*/
+bfloat16x8x2_t
+test_vld2q_lane_bf16 (const bfloat16_t *a, bfloat16x8x2_t b)
+{
+ return vld2q_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vld3_lane_bf16:
+** vld3.16 {d0\[2\], d1\[2\], d2\[2\]}, \[r0\]
+** bx lr
+*/
+bfloat16x4x3_t
+test_vld3_lane_bf16 (const bfloat16_t *a, bfloat16x4x3_t b)
+{
+ return vld3_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vld3q_lane_bf16:
+** vld3.16 {d0\[2\], d2\[2\], d4\[2\]}, \[r0\]
+** bx lr
+*/
+bfloat16x8x3_t
+test_vld3q_lane_bf16 (const bfloat16_t *a, bfloat16x8x3_t b)
+{
+ return vld3q_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vld4_lane_bf16:
+** vld4.16 {d0\[2\], d1\[2\], d2\[2\], d3\[2\]}, \[r0\]
+** bx lr
+*/
+bfloat16x4x4_t
+test_vld4_lane_bf16 (const bfloat16_t *a, bfloat16x4x4_t b)
+{
+ return vld4_lane_bf16 (a, b, 2);
+}
+
+/*
+**test_vld4q_lane_bf16:
+** vld4.16 {d0\[2\], d2\[2\], d4\[2\], d6\[2\]}, \[r0\]
+** bx lr
+*/
+bfloat16x8x4_t
+test_vld4q_lane_bf16 (const bfloat16_t *a, bfloat16x8x4_t b)
+{
+ return vld4q_lane_bf16 (a, b, 2);
+}